From 9672b3e150d451b2d9be604529bd584015d332cc Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 13 May 2024 10:38:29 -0700 Subject: [PATCH 1/4] Fix infra --- .gitignore | 3 +++ .pre-commit-config.yaml | 5 ----- pyproject.toml | 8 +++++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 1d41080..66da313 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,6 @@ venv.bak/ # IDEs .idea/ + +# Jupyter scratch notebooks +notebooks/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aaf9594..764b8e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,11 +25,6 @@ repos: # Add --fix, in case you want it to autofix when this hook runs entry: poetry run ruff check --force-exclude --fix require_serial: true - - id: ruff - name: ruff - # Add --fix, in case you want it to autofix when this hook runs - entry: poetry run ruff check --force-exclude - require_serial: true language: system types: [ python ] - id: mypy diff --git a/pyproject.toml b/pyproject.toml index cabf98b..9464333 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,9 @@ my-cli = "climate_downscale.cli:main" [tool.ruff] target-version = "py310" # The lowest supported version +exclude = [ + "temperature", +] [tool.ruff.lint] # By default, enable all the lint rules. @@ -90,7 +93,7 @@ classmethod-decorators = [ ] [tool.ruff.lint.flake8-tidy-imports] -ban-relative-imports = true +ban-relative-imports = "all" [tool.pytest.ini_options] addopts = """\ @@ -112,6 +115,9 @@ exclude_lines = [ # Avoid changing this! strict = true # See all the enabled flags `mypy --help | grep -A 10 'Strict mode'` disallow_any_unimported = true +exclude = [ + "temperature", +] # If you need to ignore something for some specific module, # add overrides for them. Avoid changing the global config! From 8e2fc9bb7717c4ea71d7fc2bf68303f344ed392b Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 13 May 2024 10:40:32 -0700 Subject: [PATCH 2/4] infra --- README.md | 8 ++++---- pyproject.toml | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 97af837..fc99068 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,13 @@ Pipelines to downscale ERA5 and CMIP6 data. Instructions using conda: -1. Clone this repository. +1. Clone this repository. Over ssh: ```sh git clone git@github.com:ihmeuw/climate-downscale.git ``` - + Over https: ```sh git clone https://github.com/ihmeuw/climate-downscale.git @@ -49,8 +49,8 @@ Instructions using conda: ### Pre-commit -Pre-commit hooks run all the auto-formatting (`ruff format`), linters -(e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is in +Pre-commit hooks run all the auto-formatting (`ruff format`), linters +(e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is in good shape before a commit/push happens. You can install the hooks with (runs for each commit): diff --git a/pyproject.toml b/pyproject.toml index 9464333..ee83dbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,9 @@ exclude = [ # If you need some ignores for certain modules, see tool.ruff.lint.per-file-ignores below. # For individual ignore cases, prefer inline `# noqa`s within the code. select = ["ALL"] +exclude = [ + "temperature", +] ignore = [ "COM812", # flake8 missing trailing comma, formatter handles "ISC001", # Implicit string concatenation From 1c8eac32b67d0715b28cd2ef67847a75cbd67dc8 Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 13 May 2024 10:40:57 -0700 Subject: [PATCH 3/4] Delete checkpoints --- .../lcz_extraction_testing-checkpoint.ipynb | 702 ------------------ .../lcz_to_table-checkpoint.ipynb | 6 - ...l_climate_zone_resampling-checkpoint.ipynb | 253 ------- .../cdsapi_run-checkpoint.ipynb | 75 -- .../cdsapi_run-checkpoint.py | 48 -- .../cdsapi_run_yearmonth-checkpoint.py | 46 -- .../download_era5_daily_data-checkpoint.ipynb | 6 - .../era5_data_hourly_to_daily-checkpoint.py | 36 - ...urly_to_daily_interactive-checkpoint.ipynb | 218 ------ .../parallel_hourly_to_daily-checkpoint.ipynb | 144 ---- .../unzip_era5_downloads-checkpoint.ipynb | 6 - 11 files changed, 1540 deletions(-) delete mode 100644 temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_extraction_testing-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_to_table-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/local_climate_zone_resampling-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.py delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run_yearmonth-checkpoint.py delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/download_era5_daily_data-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily-checkpoint.py delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily_interactive-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/parallel_hourly_to_daily-checkpoint.ipynb delete mode 100644 temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/unzip_era5_downloads-checkpoint.ipynb diff --git a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_extraction_testing-checkpoint.ipynb b/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_extraction_testing-checkpoint.ipynb deleted file mode 100644 index 135c9d8..0000000 --- a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_extraction_testing-checkpoint.ipynb +++ /dev/null @@ -1,702 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 14, - "id": "f96a9846-69b3-476c-a6e9-8c9711157570", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import rasterio as reo\n", - "from rasterio.enums import Resampling\n", - "from rasterio.crs import CRS\n", - "import sklearn\n", - "import geopandas as gpd\n", - "from shapely.geometry import Point\n", - "import matplotlib.pyplot as plt\n", - "import rioxarray\n", - "import re\n", - "import os\n", - "import xarray as xr\n", - "import datetime as dt\n", - "from datetime import datetime\n", - "import argparse\n", - "import rioxarray\n", - "from rasterstats import point_query\n", - "from geopy.distance import geodesic" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a6cfd793-e985-4d63-839d-822b58358120", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "lcz_filepath = \"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif\"\n", - "stations_path = '/ihme/homes/nhashmeh/downscale_temperature/global_summaries/'" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "15f0457b-3215-4f4f-b4bc-b9668b0b0029", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading in station data\n" - ] - } - ], - "source": [ - "# load in station data\n", - "print(\"Loading in station data\")\n", - "stations_filename = '1990_all_data.csv'\n", - "station_data = pd.read_csv(stations_path + stations_filename) \n", - "# break\n", - "\n", - "# processing station data...\n", - "station_data.columns = station_data.columns.str.lower()\n", - "station_data.drop(columns=\"station\", inplace=True) # don't need station numbers...\n", - "station_data.rename(columns={'date': 'time'}, inplace = True)\n", - "station_data['time'] = pd.to_datetime(station_data['time'])\n", - "station_data = station_data.dropna(how = 'any', subset = ['latitude', 'longitude', 'temp', 'elevation']) # drop rows where there are no coords (data isn't always clean...)\n", - "station_data['temp'] = (station_data['temp'] - 32) * 5/9 # convert to C\n", - "\n", - "grouped_stations = station_data.groupby(station_data['time'].dt.month)\n", - "\n", - "group_1 = grouped_stations.get_group(1)\n", - "\n", - "gdf_station = gpd.GeoDataFrame(group_1, geometry=gpd.points_from_xy(group_1.longitude, group_1.latitude))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6a157160-082f-4214-8b5a-2610648a103e", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
timelatitudelongitudeelevationtempgeometry
01990-01-0146.00000026.133333569.0-8.888889POINT (26.13333 46.00000)
11990-01-0246.00000026.133333569.0-8.222222POINT (26.13333 46.00000)
21990-01-0346.00000026.133333569.0-8.000000POINT (26.13333 46.00000)
31990-01-0446.00000026.133333569.0-11.444444POINT (26.13333 46.00000)
41990-01-0546.00000026.133333569.0-19.555556POINT (26.13333 46.00000)
.....................
26158061990-01-2714.916667-92.250000118.025.444444POINT (-92.25000 14.91667)
26158071990-01-2814.916667-92.250000118.026.444444POINT (-92.25000 14.91667)
26158081990-01-2914.916667-92.250000118.026.777778POINT (-92.25000 14.91667)
26158091990-01-3014.916667-92.250000118.025.000000POINT (-92.25000 14.91667)
26158101990-01-3114.916667-92.250000118.026.055556POINT (-92.25000 14.91667)
\n", - "

215501 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " time latitude longitude elevation temp \\\n", - "0 1990-01-01 46.000000 26.133333 569.0 -8.888889 \n", - "1 1990-01-02 46.000000 26.133333 569.0 -8.222222 \n", - "2 1990-01-03 46.000000 26.133333 569.0 -8.000000 \n", - "3 1990-01-04 46.000000 26.133333 569.0 -11.444444 \n", - "4 1990-01-05 46.000000 26.133333 569.0 -19.555556 \n", - "... ... ... ... ... ... \n", - "2615806 1990-01-27 14.916667 -92.250000 118.0 25.444444 \n", - "2615807 1990-01-28 14.916667 -92.250000 118.0 26.444444 \n", - "2615808 1990-01-29 14.916667 -92.250000 118.0 26.777778 \n", - "2615809 1990-01-30 14.916667 -92.250000 118.0 25.000000 \n", - "2615810 1990-01-31 14.916667 -92.250000 118.0 26.055556 \n", - "\n", - " geometry \n", - "0 POINT (26.13333 46.00000) \n", - "1 POINT (26.13333 46.00000) \n", - "2 POINT (26.13333 46.00000) \n", - "3 POINT (26.13333 46.00000) \n", - "4 POINT (26.13333 46.00000) \n", - "... ... \n", - "2615806 POINT (-92.25000 14.91667) \n", - "2615807 POINT (-92.25000 14.91667) \n", - "2615808 POINT (-92.25000 14.91667) \n", - "2615809 POINT (-92.25000 14.91667) \n", - "2615810 POINT (-92.25000 14.91667) \n", - "\n", - "[215501 rows x 6 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf_station" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "id": "92547408-ce77-4099-85d4-00fea7a95aed", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "02:47:10 PM\n" - ] - } - ], - "source": [ - "now = datetime.now()\n", - "current_time = now.strftime(\"%I:%M:%S %p\")\n", - "print(current_time)\n", - "# Open your raster file\n", - "with reo.open(lcz_filepath) as src:\n", - " # Transpose your coordinate pairs and sample from the raster\n", - " raster_values = [value for value in src.sample(np.transpose((gdf_station.geometry.x, gdf_station.geometry.y)))]\n", - " raster_lat = [value for value in src.sample(np.transpose((gdf_station.geometry.x, gdf_station.geometry.y)))]\n", - " # Add the raster values to your dataframe\n", - " gdf_station['band_1'] = raster_values\n", - " gdf_station['band_1'] = gdf_station['band_1'].apply(lambda x: x[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c858ce51-2d49-4eab-af04-75cf738e1c23", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "03:33:53 PM\n" - ] - } - ], - "source": [ - "now = datetime.now()\n", - "current_time = now.strftime(\"%I:%M:%S %p\")\n", - "print(current_time)\n", - "# Open your raster file\n", - "with reo.open(lcz_filepath) as src:\n", - " # Get geographic coordinates\n", - " coords = np.transpose((gdf_station.geometry.x, gdf_station.geometry.y))\n", - "\n", - " # Transpose your coordinate pairs, sample from the raster, and get raster coordinates\n", - " raster_values_and_coords = [(value, src.xy(*src.index(*coord))) for coord, value in zip(coords, src.sample(coords))]\n", - "\n", - " # Separate values and coordinates\n", - " raster_values = [value[0] for value in raster_values_and_coords]\n", - " raster_coords = [coord for value, coord in raster_values_and_coords]\n", - "\n", - " # Add the raster values and coordinates to your dataframe\n", - " gdf_station['raster_value'] = raster_values\n", - " gdf_station['raster_coords'] = raster_coords\n", - " # Split 'raster_coords' into 'latitude' and 'longitude'\n", - " gdf_station[['lcz_longitude', 'lcz_latitude']] = gdf_station['raster_coords'].apply(pd.Series)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "321411e8-34e0-4945-a434-a6394f8c8384", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Use apply to calculate the distance between each pair of coordinates\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m gdf_station[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlcz_distance_meters\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mgdf_station\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mgeodesic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlatitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlongitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlcz_latitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlcz_longitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmeters\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/geopandas/geodataframe.py:1581\u001b[0m, in \u001b[0;36mGeoDataFrame.apply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;129m@doc\u001b[39m(pd\u001b[38;5;241m.\u001b[39mDataFrame)\n\u001b[1;32m 1580\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m, func, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, raw\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, result_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, args\u001b[38;5;241m=\u001b[39m(), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m-> 1581\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1582\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mraw\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mraw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresult_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresult_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 1583\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1584\u001b[0m \u001b[38;5;66;03m# pandas <1.4 re-attach last geometry col if lost\u001b[39;00m\n\u001b[1;32m 1585\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1586\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m compat\u001b[38;5;241m.\u001b[39mPANDAS_GE_14\n\u001b[1;32m 1587\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, GeoDataFrame)\n\u001b[1;32m 1588\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m result\u001b[38;5;241m.\u001b[39m_geometry_column_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1589\u001b[0m ):\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/frame.py:9568\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[1;32m 9557\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapply\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m frame_apply\n\u001b[1;32m 9559\u001b[0m op \u001b[38;5;241m=\u001b[39m frame_apply(\n\u001b[1;32m 9560\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 9561\u001b[0m func\u001b[38;5;241m=\u001b[39mfunc,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9566\u001b[0m kwargs\u001b[38;5;241m=\u001b[39mkwargs,\n\u001b[1;32m 9567\u001b[0m )\n\u001b[0;32m-> 9568\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapply\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/apply.py:764\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 761\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw:\n\u001b[1;32m 762\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_raw()\n\u001b[0;32m--> 764\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/apply.py:891\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 890\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply_standard\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 891\u001b[0m results, res_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_series_generator\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;66;03m# wrap results\u001b[39;00m\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwrap_results(results, res_index)\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/apply.py:907\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m option_context(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode.chained_assignment\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 905\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(series_gen):\n\u001b[1;32m 906\u001b[0m \u001b[38;5;66;03m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[0;32m--> 907\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[1;32m 909\u001b[0m \u001b[38;5;66;03m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[1;32m 911\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m results[i]\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", - "Cell \u001b[0;32mIn[17], line 2\u001b[0m, in \u001b[0;36m\u001b[0;34m(row)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Use apply to calculate the distance between each pair of coordinates\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m gdf_station[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlcz_distance_meters\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m gdf_station\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m row: geodesic(row[[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlatitude\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlongitude\u001b[39m\u001b[38;5;124m'\u001b[39m]], \u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlcz_latitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlcz_longitude\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m)\u001b[38;5;241m.\u001b[39mmeters, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/series.py:1007\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1004\u001b[0m key \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(key, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mbool\u001b[39m)\n\u001b[1;32m 1005\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_values(key)\n\u001b[0;32m-> 1007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_with\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/series.py:1047\u001b[0m, in \u001b[0;36mSeries._get_with\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1044\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[key]\n\u001b[1;32m 1046\u001b[0m \u001b[38;5;66;03m# handle the dup indexing case GH#4246\u001b[39;00m\n\u001b[0;32m-> 1047\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/indexing.py:1073\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1070\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1072\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[0;32m-> 1073\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaybe_callable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/indexing.py:1301\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1298\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m key\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index with multidimensional key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1301\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_iterable\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_nested_tuple(key, labels):\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/indexing.py:1240\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_iterable\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1238\u001b[0m \u001b[38;5;66;03m# A collection of keys\u001b[39;00m\n\u001b[1;32m 1239\u001b[0m keyarr, indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_listlike_indexer(key, axis)\n\u001b[0;32m-> 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reindex_with_indexers\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1241\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_dups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 1242\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/generic.py:5355\u001b[0m, in \u001b[0;36mNDFrame._reindex_with_indexers\u001b[0;34m(self, reindexers, fill_value, copy, allow_dups)\u001b[0m\n\u001b[1;32m 5352\u001b[0m indexer \u001b[38;5;241m=\u001b[39m ensure_platform_int(indexer)\n\u001b[1;32m 5354\u001b[0m \u001b[38;5;66;03m# TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)\u001b[39;00m\n\u001b[0;32m-> 5355\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[43mnew_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreindex_indexer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5356\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5357\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5358\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbaxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5359\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5360\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_dups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_dups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5361\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5362\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5363\u001b[0m \u001b[38;5;66;03m# If we've made a copy once, no need to make another one\u001b[39;00m\n\u001b[1;32m 5364\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/internals/managers.py:743\u001b[0m, in \u001b[0;36mBaseBlockManager.reindex_indexer\u001b[0;34m(self, new_axis, indexer, axis, fill_value, allow_dups, copy, only_slice, use_na_proxy)\u001b[0m\n\u001b[1;32m 740\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRequested axis not found in manager\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 742\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 743\u001b[0m new_blocks, new_refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_slice_take_blocks_ax0\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 744\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 745\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 746\u001b[0m \u001b[43m \u001b[49m\u001b[43monly_slice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43monly_slice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 747\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_na_proxy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_na_proxy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 748\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 749\u001b[0m parent \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mall_none(\u001b[38;5;241m*\u001b[39mnew_refs) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 750\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/internals/managers.py:803\u001b[0m, in \u001b[0;36mBaseBlockManager._slice_take_blocks_ax0\u001b[0;34m(self, slice_or_indexer, fill_value, only_slice, use_na_proxy)\u001b[0m\n\u001b[1;32m 782\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 783\u001b[0m \u001b[38;5;124;03mSlice/take blocks along axis=0.\u001b[39;00m\n\u001b[1;32m 784\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 799\u001b[0m \u001b[38;5;124;03mnew_blocks : list of Block\u001b[39;00m\n\u001b[1;32m 800\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 801\u001b[0m allow_fill \u001b[38;5;241m=\u001b[39m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mno_default\n\u001b[0;32m--> 803\u001b[0m sl_type, slobj, sllen \u001b[38;5;241m=\u001b[39m \u001b[43m_preprocess_slice_or_indexer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mslice_or_indexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_fill\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_fill\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_single_block:\n\u001b[1;32m 808\u001b[0m blk \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocks[\u001b[38;5;241m0\u001b[39m]\n", - "File \u001b[0;32m~/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pandas/core/internals/managers.py:2410\u001b[0m, in \u001b[0;36m_preprocess_slice_or_indexer\u001b[0;34m(slice_or_indexer, length, allow_fill)\u001b[0m\n\u001b[1;32m 2407\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_preprocess_slice_or_indexer\u001b[39m(\n\u001b[1;32m 2408\u001b[0m slice_or_indexer: \u001b[38;5;28mslice\u001b[39m \u001b[38;5;241m|\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray, length: \u001b[38;5;28mint\u001b[39m, allow_fill: \u001b[38;5;28mbool\u001b[39m\n\u001b[1;32m 2409\u001b[0m ):\n\u001b[0;32m-> 2410\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mslice_or_indexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mslice\u001b[39;49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 2411\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[1;32m 2412\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mslice\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2413\u001b[0m slice_or_indexer,\n\u001b[1;32m 2414\u001b[0m libinternals\u001b[38;5;241m.\u001b[39mslice_len(slice_or_indexer, length),\n\u001b[1;32m 2415\u001b[0m )\n\u001b[1;32m 2416\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# Use apply to calculate the distance between each pair of coordinates\n", - "gdf_station['lcz_distance_meters'] = gdf_station.apply(lambda row: geodesic(row[['latitude', 'longitude']], row[['lcz_latitude', 'lcz_longitude']]).meters, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "03a0198b-6fe6-4d47-8fd0-39bf451d6f48", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
timelatitudelongitudeelevationtempgeometryraster_valueraster_coordslcz_longitudelcz_latitude
01990-01-0145.99958226.133339569.0-8.888889POINT (26.13333 46.00000)[6](26.13333908796227, 45.999581596266424)26.13333945.999582
11990-01-0245.99958226.133339569.0-8.222222POINT (26.13333 46.00000)[6](26.13333908796227, 45.999581596266424)26.13333945.999582
21990-01-0345.99958226.133339569.0-8.000000POINT (26.13333 46.00000)[6](26.13333908796227, 45.999581596266424)26.13333945.999582
31990-01-0445.99958226.133339569.0-11.444444POINT (26.13333 46.00000)[6](26.13333908796227, 45.999581596266424)26.13333945.999582
41990-01-0545.99958226.133339569.0-19.555556POINT (26.13333 46.00000)[6](26.13333908796227, 45.999581596266424)26.13333945.999582
.................................
26158061990-01-2714.916974-92.250242118.025.444444POINT (-92.25000 14.91667)[6](-92.25024231444428, 14.916974450446986)-92.25024214.916974
26158071990-01-2814.916974-92.250242118.026.444444POINT (-92.25000 14.91667)[6](-92.25024231444428, 14.916974450446986)-92.25024214.916974
26158081990-01-2914.916974-92.250242118.026.777778POINT (-92.25000 14.91667)[6](-92.25024231444428, 14.916974450446986)-92.25024214.916974
26158091990-01-3014.916974-92.250242118.025.000000POINT (-92.25000 14.91667)[6](-92.25024231444428, 14.916974450446986)-92.25024214.916974
26158101990-01-3114.916974-92.250242118.026.055556POINT (-92.25000 14.91667)[6](-92.25024231444428, 14.916974450446986)-92.25024214.916974
\n", - "

215501 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " time latitude longitude elevation temp \\\n", - "0 1990-01-01 45.999582 26.133339 569.0 -8.888889 \n", - "1 1990-01-02 45.999582 26.133339 569.0 -8.222222 \n", - "2 1990-01-03 45.999582 26.133339 569.0 -8.000000 \n", - "3 1990-01-04 45.999582 26.133339 569.0 -11.444444 \n", - "4 1990-01-05 45.999582 26.133339 569.0 -19.555556 \n", - "... ... ... ... ... ... \n", - "2615806 1990-01-27 14.916974 -92.250242 118.0 25.444444 \n", - "2615807 1990-01-28 14.916974 -92.250242 118.0 26.444444 \n", - "2615808 1990-01-29 14.916974 -92.250242 118.0 26.777778 \n", - "2615809 1990-01-30 14.916974 -92.250242 118.0 25.000000 \n", - "2615810 1990-01-31 14.916974 -92.250242 118.0 26.055556 \n", - "\n", - " geometry raster_value \\\n", - "0 POINT (26.13333 46.00000) [6] \n", - "1 POINT (26.13333 46.00000) [6] \n", - "2 POINT (26.13333 46.00000) [6] \n", - "3 POINT (26.13333 46.00000) [6] \n", - "4 POINT (26.13333 46.00000) [6] \n", - "... ... ... \n", - "2615806 POINT (-92.25000 14.91667) [6] \n", - "2615807 POINT (-92.25000 14.91667) [6] \n", - "2615808 POINT (-92.25000 14.91667) [6] \n", - "2615809 POINT (-92.25000 14.91667) [6] \n", - "2615810 POINT (-92.25000 14.91667) [6] \n", - "\n", - " raster_coords lcz_longitude lcz_latitude \n", - "0 (26.13333908796227, 45.999581596266424) 26.133339 45.999582 \n", - "1 (26.13333908796227, 45.999581596266424) 26.133339 45.999582 \n", - "2 (26.13333908796227, 45.999581596266424) 26.133339 45.999582 \n", - "3 (26.13333908796227, 45.999581596266424) 26.133339 45.999582 \n", - "4 (26.13333908796227, 45.999581596266424) 26.133339 45.999582 \n", - "... ... ... ... \n", - "2615806 (-92.25024231444428, 14.916974450446986) -92.250242 14.916974 \n", - "2615807 (-92.25024231444428, 14.916974450446986) -92.250242 14.916974 \n", - "2615808 (-92.25024231444428, 14.916974450446986) -92.250242 14.916974 \n", - "2615809 (-92.25024231444428, 14.916974450446986) -92.250242 14.916974 \n", - "2615810 (-92.25024231444428, 14.916974450446986) -92.250242 14.916974 \n", - "\n", - "[215501 rows x 10 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf_station" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "a045e6d0-526b-4c9e-bdc7-35c73924d2f0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "14 77370\n", - "0 22604\n", - "16 21102\n", - "6 20605\n", - "11 20055\n", - "8 12550\n", - "12 10803\n", - "9 10747\n", - "17 5972\n", - "15 3600\n", - "3 3401\n", - "5 1982\n", - "2 1927\n", - "13 1684\n", - "4 610\n", - "1 237\n", - "7 169\n", - "10 83\n", - "Name: band_1, dtype: int64" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf_station['band_1'].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "1836bf3c-33fb-414a-ba05-983b01a0f41a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array(8, dtype=uint8)" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "da.band_1.sel(x=\"-122.329038\", y=\"37.808930\", method='nearest').values" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "earth-analytics-python", - "language": "python", - "name": "earth-analytics-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_to_table-checkpoint.ipynb b/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_to_table-checkpoint.ipynb deleted file mode 100644 index 363fcab..0000000 --- a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/lcz_to_table-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/local_climate_zone_resampling-checkpoint.ipynb b/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/local_climate_zone_resampling-checkpoint.ipynb deleted file mode 100644 index f758ff8..0000000 --- a/temperature/pre_launch_scripts/climate_zones/.ipynb_checkpoints/local_climate_zone_resampling-checkpoint.ipynb +++ /dev/null @@ -1,253 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "41462762-5af1-4511-9eb7-3d32ab56ecbb", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pyproj/__init__.py:89: UserWarning: pyproj unable to set database path.\n", - " _pyproj_global_context_initialize()\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import rasterio as reo\n", - "from rasterio.enums import Resampling\n", - "from rasterio.crs import CRS\n", - "import sklearn\n", - "# from osgeo import gdal\n", - "import geopandas\n", - "import matplotlib.pyplot as plt\n", - "import rioxarray\n", - "import re" - ] - }, - { - "cell_type": "markdown", - "id": "5379b289-a660-4729-a317-7f9fbd0c4b47", - "metadata": { - "tags": [] - }, - "source": [ - "## RASTERIO TESTING" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "7523837a-a15a-43c0-b631-912dd84761dd", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "path = \"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/\"\n", - "cz_raster = \"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif\"\n", - "reo_raster = \"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/rasterio_test.tif\"" - ] - }, - { - "cell_type": "markdown", - "id": "fef1a24e-1a42-4483-9e01-5fdc811cb855", - "metadata": {}, - "source": [ - "# inspect data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "afdb4559-7817-4b87-9b45-2f0815227145", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR 1: PROJ: proj_identify: /ihme/homes/nhashmeh/miniconda3/share/proj/proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'driver': 'GTiff',\n", - " 'dtype': 'uint8',\n", - " 'nodata': None,\n", - " 'width': 38962,\n", - " 'height': 15599,\n", - " 'count': 1,\n", - " 'crs': CRS.from_wkt('GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]'),\n", - " 'transform': Affine(0.008983152841195179, 0.0, -170.00077762791474,\n", - " 0.0, -0.008983440781218295, 80.03809518448114)}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# load in data\n", - "testing = reo.open(\"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_resampled_mode.tif\")\n", - "testing.meta" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "7df0c5ee-ab28-4354-acf0-c0f1141276dd", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'testing' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[43mtesting\u001b[49m\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 2\u001b[0m np\u001b[38;5;241m.\u001b[39munique(array)\n", - "\u001b[0;31mNameError\u001b[0m: name 'testing' is not defined" - ] - } - ], - "source": [ - "array = testing.read(1)\n", - "np.unique(array)" - ] - }, - { - "cell_type": "markdown", - "id": "629846df-e9e6-41a6-9709-912d8f091c90", - "metadata": {}, - "source": [ - "# resampling" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ae1f607a-a066-4271-b4e4-d347878df9da", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Shape before resample: (155995, 389620)\n", - "Shape after resample: (15599, 38962)\n", - "Transform before resample:\n", - " | 0.00, 0.00,-170.00|\n", - "| 0.00,-0.00, 80.04|\n", - "| 0.00, 0.00, 1.00| \n", - "\n", - "Transform after resample:\n", - " | 0.01, 0.00,-170.00|\n", - "| 0.00,-0.01, 80.04|\n", - "| 0.00, 0.00, 1.00|\n", - "(15599, 38962)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR 1: PROJ: proj_create_from_name: /ihme/homes/nhashmeh/miniconda3/share/proj/proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation.\n" - ] - } - ], - "source": [ - "# load in data\n", - "og_data = reo.open(\"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif\")\n", - "\n", - "# factor for rescaling (100m -> 1000m)\n", - "downscale_factor = 1/10\n", - "\n", - "with reo.open(\"/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif\", crs='EPSG:4326') as dataset:\n", - "\n", - " # resample data to target shape\n", - " data = dataset.read(\n", - " out_shape=(\n", - " dataset.count,\n", - " int(dataset.height * downscale_factor), # multiply height by downscale factor\n", - " int(dataset.width * downscale_factor) # multiply widrh by downscale factor\n", - " ),\n", - " resampling=Resampling.mode # resampling method\n", - " )\n", - "\n", - " # scale image transform\n", - " new = dataset.transform * dataset.transform.scale(\n", - " (dataset.width / data.shape[-1]),\n", - " (dataset.height / data.shape[-2])\n", - " )\n", - "\n", - "print('Shape before resample:', og_data.shape)\n", - "print('Shape after resample:', data.shape[1:])\n", - "print('Transform before resample:\\n', dataset.transform, '\\n')\n", - "print('Transform after resample:\\n', new)\n", - "\n", - "new_dataset = reo.open(\n", - " '/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_resampled_mode.tif',\n", - " 'w',\n", - " driver='GTiff',\n", - " height=data.shape[1],\n", - " width=data.shape[2],\n", - " count=1,\n", - " dtype=data.dtype,\n", - " crs=dataset.crs,\n", - " # crs='EPSG:4326',\n", - " # crs=CRS.from_user_input('EPSG:4326'),\n", - " transform=new,\n", - ")\n", - "\n", - "print(new_dataset.shape)\n", - "\n", - "new_dataset.write(data)\n", - "new_dataset.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bdfda352-135d-48cf-b9a6-e460cda59b0e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "earth-analytics-python", - "language": "python", - "name": "earth-analytics-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.ipynb b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.ipynb deleted file mode 100644 index 9192b4c..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.ipynb +++ /dev/null @@ -1,75 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "ca73c4c6-f39a-4dbd-8904-4fb92e2b346a", - "metadata": {}, - "outputs": [], - "source": [ - "import cdsapi\n", - "\n", - "c = cdsapi.Client()\n", - "\n", - "c.retrieve(\n", - " 'reanalysis-era5-land',\n", - " {\n", - " 'variable': '2m_temperature',\n", - " 'year': i,\n", - " 'month': [\n", - " '01', '02', '03',\n", - " '04', '05', '06',\n", - " '07', '08', '09',\n", - " '10', '11', '12',\n", - " ],\n", - " 'day': [\n", - " '01', '02', '03',\n", - " '04', '05', '06',\n", - " '07', '08', '09',\n", - " '10', '11', '12',\n", - " '13', '14', '15',\n", - " '16', '17', '18',\n", - " '19', '20', '21',\n", - " '22', '23', '24',\n", - " '25', '26', '27',\n", - " '28', '29', '30',\n", - " '31',\n", - " ],\n", - " 'format': 'netcdf.zip',\n", - " 'time': [\n", - " '00:00', '01:00', '02:00',\n", - " '03:00', '04:00', '05:00',\n", - " '06:00', '07:00', '08:00',\n", - " '09:00', '10:00', '11:00',\n", - " '12:00', '13:00', '14:00',\n", - " '15:00', '16:00', '17:00',\n", - " '18:00', '19:00', '20:00',\n", - " '21:00', '22:00', '23:00',\n", - " ],\n", - " },\n", - " '/ihme/homes/nhashmeh/downscale_temperature/era5_data/1990_download.netcdf.zip')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "earth-analytics-python", - "language": "python", - "name": "earth-analytics-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.py b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.py deleted file mode 100644 index 08639a6..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run-checkpoint.py +++ /dev/null @@ -1,48 +0,0 @@ -import cdsapi -import argparse - -parser = argparse.ArgumentParser(description='Year input') -parser.add_argument('year', metavar='Y', type=str, - help='the year of interest as a string') -args = parser.parse_args() - -c = cdsapi.Client() -print(args) -c.retrieve( - 'reanalysis-era5-land', - { - 'variable': '2m_temperature', - 'year': args.year, - 'month': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', - ], - 'day': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', - '13', '14', '15', - '16', '17', '18', - '19', '20', '21', - '22', '23', '24', - '25', '26', '27', - '28', '29', '30', - '31', - ], - 'format': 'netcdf.zip', - # 'format': 'grib', - 'time': [ - '00:00', - '03:00', - '06:00', - '09:00', - '12:00', - '15:00', - '18:00', - '21:00', - ], - }, - '/mnt/share/erf/ERA5/three_hourly_temp_9km/{}_download.zip'.format(args.year)) \ No newline at end of file diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run_yearmonth-checkpoint.py b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run_yearmonth-checkpoint.py deleted file mode 100644 index 5742a25..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/cdsapi_run_yearmonth-checkpoint.py +++ /dev/null @@ -1,46 +0,0 @@ -import cdsapi -import argparse -import subprocess - -parser = argparse.ArgumentParser(description='Year input') -parser.add_argument('year', metavar='Y', type=str, - help='the year of interest as an integer') -parser.add_argument('month', metavar='M', type=str, - help='the month of interest as an integer') - -args = parser.parse_args() - -c = cdsapi.Client() -print(args) -c.retrieve( - 'reanalysis-era5-land', - { - 'variable': '2m_temperature', - 'year': args.year, - 'month': args.month, - 'day': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', - '13', '14', '15', - '16', '17', '18', - '19', '20', '21', - '22', '23', '24', - '25', '26', '27', - '28', '29', '30', - '31', - ], - 'format': 'netcdf.zip', - 'time': [ - '00:00', '01:00', '02:00', - '03:00', '04:00', '05:00', - '06:00', '07:00', '08:00', - '09:00', '10:00', '11:00', - '12:00', '13:00', '14:00', - '15:00', '16:00', '17:00', - '18:00', '19:00', '20:00', - '21:00', '22:00', '23:00', - ], - }, - '/mnt/share/erf/ERA5/hourly_temp_9km/{}_{}_download.netcdf.zip'.format(int(args.year), int(args.month))) \ No newline at end of file diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/download_era5_daily_data-checkpoint.ipynb b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/download_era5_daily_data-checkpoint.ipynb deleted file mode 100644 index 363fcab..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/download_era5_daily_data-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily-checkpoint.py b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily-checkpoint.py deleted file mode 100644 index 73306e2..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily-checkpoint.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import pandas as pd -import rasterio as reo -from rasterio.enums import Resampling -from rasterio.crs import CRS -import sklearn -import geopandas -import matplotlib.pyplot as plt -import rioxarray -import re -import os -import xarray as xr -import datetime as dt -from datetime import datetime -import argparse -import getpass -import subprocess - -# Parse arguments -parser = argparse.ArgumentParser() -parser.add_argument('file', type=str, help='filename') - -args = parser.parse_args() - -file = args.file - -path_to_files = '/mnt/share/erf/ERA5/era5_precip/downloaded_unzipped/' -output_path = '/mnt/share/erf/ERA5/era5_precip/daily_precip/' - -get_name = file.split(".")[0] -new_filename = get_name + "_daily.nc" -data = xr.open_dataset(path_to_files + file) # loads in data -print("Calculating daily averages from hourly data for " + get_name) -daily_data = data.resample(time='D').mean() # calculates daily averages -print("Saving new daily averages as " + new_filename) -daily_data.to_netcdf(output_path + new_filename) \ No newline at end of file diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily_interactive-checkpoint.ipynb b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily_interactive-checkpoint.ipynb deleted file mode 100644 index 7c16d68..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/era5_data_hourly_to_daily_interactive-checkpoint.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "22488016-510e-4674-ba97-9076c1ad3031", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from scipy.io import netcdf_file\n", - "import numpy as np\n", - "import zipfile\n", - "import subprocess\n", - "import xarray as xr\n", - "import os\n", - "import rasterio" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d3652fc6-3a6a-4a86-915e-f66a00d686a7", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Variables\n", - "\n", - "filename = 'data.nc'\n", - "path_to_files = '/mnt/share/erf/ERA5/three_hourly_temp_9km/downloaded_unzipped/'\n", - "output_path = '/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/'" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "e48a75c9-ff1e-4774-8236-d916ab64280d", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Calculating daily averages from hourly data for 2011_era5_temp\n", - "Saving new daily averages as 2011_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2021_era5_temp\n", - "Saving new daily averages as 2021_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2002_era5_temp\n", - "Saving new daily averages as 2002_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2009_era5_temp\n", - "Saving new daily averages as 2009_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2007_era5_temp\n", - "Saving new daily averages as 2007_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2023_era5_temp\n", - "Saving new daily averages as 2023_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2003_era5_temp\n", - "Saving new daily averages as 2003_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2001_era5_temp\n", - "Saving new daily averages as 2001_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2005_era5_temp\n", - "Saving new daily averages as 2005_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1992_era5_temp\n", - "Saving new daily averages as 1992_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2018_era5_temp\n", - "Saving new daily averages as 2018_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1993_era5_temp\n", - "Saving new daily averages as 1993_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2010_era5_temp\n", - "Saving new daily averages as 2010_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1996_era5_temp\n", - "Saving new daily averages as 1996_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2019_era5_temp\n", - "Saving new daily averages as 2019_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2008_era5_temp\n", - "Saving new daily averages as 2008_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1999_era5_temp\n", - "Saving new daily averages as 1999_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2014_era5_temp\n", - "Saving new daily averages as 2014_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2006_era5_temp\n", - "Saving new daily averages as 2006_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2004_era5_temp\n", - "Saving new daily averages as 2004_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2016_era5_temp\n", - "Saving new daily averages as 2016_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1994_era5_temp\n", - "Saving new daily averages as 1994_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1995_era5_temp\n", - "Saving new daily averages as 1995_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2000_era5_temp\n", - "Saving new daily averages as 2000_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1991_era5_temp\n", - "Saving new daily averages as 1991_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2012_era5_temp\n", - "Saving new daily averages as 2012_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1997_era5_temp\n", - "Saving new daily averages as 1997_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2013_era5_temp\n", - "Saving new daily averages as 2013_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2015_era5_temp\n", - "Saving new daily averages as 2015_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 1998_era5_temp\n", - "Saving new daily averages as 1998_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2020_era5_temp\n", - "Saving new daily averages as 2020_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2017_era5_temp\n", - "Saving new daily averages as 2017_era5_temp_daily.nc\n", - "Calculating daily averages from hourly data for 2022_era5_temp\n", - "Saving new daily averages as 2022_era5_temp_daily.nc\n" - ] - } - ], - "source": [ - "for file in os.listdir(path_to_files):\n", - " get_name = file.split(\".\")[0]\n", - " new_filename = get_name + \"_daily.nc\"\n", - " data = xr.open_dataset(path_to_files + file) # loads in data\n", - " print(\"Calculating daily averages from hourly data for \" + get_name)\n", - " daily_data = data.resample(time='D').mean() # calculates daily averages\n", - " print(\"Saving new daily averages as \" + new_filename)\n", - " daily_data.to_netcdf(output_path + new_filename)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "2390768b-5ebb-4b7d-b368-96f2eb4b10d7", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'1990_era5_temp.nc'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "file = '1990_era5_temp.nc'\n", - "file" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "915e8936-5f7b-482f-b21c-517d56bd877b", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pyproj/__init__.py:89: UserWarning: pyproj unable to set database path.\n", - " _pyproj_global_context_initialize()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Calculating daily averages from hourly data for 1990_era5_temp\n", - "Saving new daily averages as 1990_era5_temp_daily.nc\n" - ] - } - ], - "source": [ - "get_name = file.split(\".\")[0]\n", - "new_filename = get_name + \"_daily.nc\"\n", - "data = xr.open_dataset(path_to_files + file) # loads in data\n", - "print(\"Calculating daily averages from hourly data for \" + get_name)\n", - "daily_data = data.resample(time='D').mean() # calculates daily averages\n", - "print(\"Saving new daily averages as \" + new_filename)\n", - "daily_data.to_netcdf(output_path + new_filename)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b7b629b7-906f-46c6-8691-1d18ac336513", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "earth-analytics-python", - "language": "python", - "name": "earth-analytics-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/parallel_hourly_to_daily-checkpoint.ipynb b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/parallel_hourly_to_daily-checkpoint.ipynb deleted file mode 100644 index 7721f38..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/parallel_hourly_to_daily-checkpoint.ipynb +++ /dev/null @@ -1,144 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "3d93f417-607a-4dcd-9c78-0591a7dfb96a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/lib/python3.8/site-packages/pyproj/__init__.py:89: UserWarning: pyproj unable to set database path.\n", - " _pyproj_global_context_initialize()\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import rasterio as reo\n", - "from rasterio.enums import Resampling\n", - "from rasterio.crs import CRS\n", - "import sklearn\n", - "import geopandas\n", - "import matplotlib.pyplot as plt\n", - "import rioxarray\n", - "import re\n", - "import os\n", - "import xarray as xr\n", - "import datetime as dt\n", - "from datetime import datetime\n", - "import argparse\n", - "import getpass\n", - "import subprocess" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "602f563c-666e-4178-bdd4-d11adbf96c76", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "path_to_files = '/mnt/share/erf/ERA5/era5_precip/downloaded_unzipped/'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "385177aa-e2b5-42fa-8cfd-52acc2799f34", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Submitted batch job 33596891\n", - "Submitted batch job 33596892\n", - "Submitted batch job 33596893\n", - "Submitted batch job 33596894\n", - "Submitted batch job 33596895\n", - "Submitted batch job 33596896\n", - "Submitted batch job 33596897\n", - "Submitted batch job 33596898\n", - "Submitted batch job 33596899\n", - "Submitted batch job 33596900\n", - "Submitted batch job 33596901\n", - "Submitted batch job 33596902\n", - "Submitted batch job 33596903\n", - "Submitted batch job 33596904\n", - "Submitted batch job 33596905\n", - "Submitted batch job 33596906\n", - "Submitted batch job 33596907\n", - "Submitted batch job 33596908\n", - "Submitted batch job 33596909\n", - "Submitted batch job 33596910\n", - "Submitted batch job 33596911\n", - "Submitted batch job 33596912\n", - "Submitted batch job 33596913\n", - "Submitted batch job 33596914\n", - "Submitted batch job 33596915\n", - "Submitted batch job 33596916\n", - "Submitted batch job 33596917\n", - "Submitted batch job 33596918\n", - "Submitted batch job 33596919\n", - "Submitted batch job 33596920\n", - "Submitted batch job 33596921\n", - "Submitted batch job 33596922\n", - "Submitted batch job 33596923\n", - "Submitted batch job 33596924\n" - ] - } - ], - "source": [ - "for file in os.listdir(path_to_files):\n", - "\n", - " year = file.split('.')[0] # get year\n", - "\n", - " asdf = ['sbatch', '-J', '{}'.format(year), '-e', '/share/temp/sgeoutput/{}/errors/%x.e%j'.format(getpass.getuser()),\n", - " '-o', '/share/temp/sgeoutput/{}/output/%x.o%j'.format(getpass.getuser()), '-A', 'proj_erf',\n", - " '--mem=750G', '-c', '5', '-t', '240', '-C', 'archive', '-p', 'all.q', #'--wait',\n", - " '--wrap', '/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/bin/python ' + '/ihme/homes/nhashmeh/downscale_temperature/era5_data/era5_data_hourly_to_daily.py ' + file]\n", - " subprocess.call(asdf)\n", - " # print(asdf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "061ee27e-763f-4422-b3d0-e727b2df6132", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "earth-analytics-python", - "language": "python", - "name": "earth-analytics-python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/unzip_era5_downloads-checkpoint.ipynb b/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/unzip_era5_downloads-checkpoint.ipynb deleted file mode 100644 index 363fcab..0000000 --- a/temperature/pre_launch_scripts/era5_data/.ipynb_checkpoints/unzip_era5_downloads-checkpoint.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} From e6e252a27b8fcaf31b59f4f1bdac751154c31a06 Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 13 May 2024 10:44:07 -0700 Subject: [PATCH 4/4] Formatting --- temperature/README.md | 4 +- temperature/daily_preds_nc.py | 54 +++--- .../grid_setup_with_elevation_v3.py | 95 ++++++---- .../grid_setup/grid_setup_with_era5_v3.py | 120 ++++++++----- temperature/merge_era5_stations_script_v3.py | 163 ++++++++++++------ .../climate_zones/readme.txt | 125 +++++++------- .../era5_data/cdsapi_run.py | 98 +++++++---- .../era5_data/era5_data_hourly_to_daily.py | 12 +- temperature/regional_predictions.py | 131 ++++++++++---- 9 files changed, 503 insertions(+), 299 deletions(-) diff --git a/temperature/README.md b/temperature/README.md index d4c54a1..e26dc70 100644 --- a/temperature/README.md +++ b/temperature/README.md @@ -4,5 +4,5 @@ This repo contains the fundamental code used to downscale ERA5 daily average tem 1. Moving the intermediate processing steps to their own worker scripts 2. Converting all outputs to a compressed file format. Ideally, using compression options found in the xarray library when outputting .nc files. -3. Creating an additional script that addresses problematic outliers in the prediction step. Ideally, this is done within the regional_predictions.py script or directly after it. -4. Include diagnostic scripts used to visualize prediction data. \ No newline at end of file +3. Creating an additional script that addresses problematic outliers in the prediction step. Ideally, this is done within the regional_predictions.py script or directly after it. +4. Include diagnostic scripts used to visualize prediction data. diff --git a/temperature/daily_preds_nc.py b/temperature/daily_preds_nc.py index 79aefaf..f09069f 100644 --- a/temperature/daily_preds_nc.py +++ b/temperature/daily_preds_nc.py @@ -22,7 +22,7 @@ final_data = pd.DataFrame() for file in os.listdir(folder): - if file.endswith('.feather'): + if file.endswith(".feather"): print(file) # load the data print("Loading data") @@ -30,15 +30,15 @@ # drop index column print("Dropping unnecessary columns") # data = data.drop(columns='index') - data = data[['latitude_st', 'longitude_st', 'day_of_year', 'predictions']] + data = data[["latitude_st", "longitude_st", "day_of_year", "predictions"]] # filter to get day print("Filtering to get day: ", day) - data_day = data[data['day_of_year'] == day] + data_day = data[data["day_of_year"] == day] # round lat/lon for storage print("Rounding lat/lon") - data_day['latitude_st'] = data_day['latitude_st'].round(10) - data_day['longitude_st'] = data_day['longitude_st'].round(10) - data_day['predictions'] = data_day['predictions'].round(3) + data_day["latitude_st"] = data_day["latitude_st"].round(10) + data_day["longitude_st"] = data_day["longitude_st"].round(10) + data_day["predictions"] = data_day["predictions"].round(3) print("Appending data") # Append the data for the current day to the larger DataFrame final_data = pd.concat([final_data, data_day]) @@ -53,26 +53,26 @@ date = date + timedelta(days=day_of_year) # Format the datetime object into a string -date_string = date.strftime('%Y_%m_%d') +date_string = date.strftime("%Y_%m_%d") print("Here is the date string: ", date_string) # replace old time columns with date -final_data.rename(columns={'latitude_st': 'lat', 'longitude_st': 'lon'}, inplace=True) -final_data = final_data[['lat', 'lon', 'predictions']] -final_data['date'] = date_string +final_data.rename(columns={"latitude_st": "lat", "longitude_st": "lon"}, inplace=True) +final_data = final_data[["lat", "lon", "predictions"]] +final_data["date"] = date_string -filename = 'predictions_' + date_string +filename = "predictions_" + date_string -print('resetting index') -final_data.set_index(['date', 'lat', 'lon'], inplace=True) +print("resetting index") +final_data.set_index(["date", "lat", "lon"], inplace=True) # convert the dataframe to an xarray Dataset -print('converting to xarray Dataset') +print("converting to xarray Dataset") final_data = xr.Dataset.from_dataframe(final_data) # Calculate the scale factor and add offset -min_value = final_data['predictions'].min(dim=('date', 'lat', 'lon')).item() -max_value = final_data['predictions'].max(dim=('date', 'lat', 'lon')).item() +min_value = final_data["predictions"].min(dim=("date", "lat", "lon")).item() +max_value = final_data["predictions"].max(dim=("date", "lat", "lon")).item() scale_factor = (max_value - min_value) / (2**16 - 1) add_offset = min_value @@ -82,13 +82,15 @@ print("saving file to: ", output_path + filename) # save the Dataset to a netCDF file -final_data.to_netcdf(output_path + filename + '.nc', - encoding={ - 'predictions': { - 'dtype': 'float32', - 'scale_factor': scale_factor, - 'add_offset': add_offset, - '_FillValue': -9999, - 'zlib': True - } - }) \ No newline at end of file +final_data.to_netcdf( + output_path + filename + ".nc", + encoding={ + "predictions": { + "dtype": "float32", + "scale_factor": scale_factor, + "add_offset": add_offset, + "_FillValue": -9999, + "zlib": True, + } + }, +) diff --git a/temperature/grid_setup/grid_setup_with_elevation_v3.py b/temperature/grid_setup/grid_setup_with_elevation_v3.py index ebcf28e..d030670 100644 --- a/temperature/grid_setup/grid_setup_with_elevation_v3.py +++ b/temperature/grid_setup/grid_setup_with_elevation_v3.py @@ -23,7 +23,7 @@ from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error -from sklearn.model_selection import cross_val_score +from sklearn.model_selection import cross_val_score from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV from joblib import load @@ -45,25 +45,35 @@ import subprocess import pyproj -pyproj.datadir.set_data_dir('/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj') -os.environ['PROJ_LIB'] = '/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj' + +pyproj.datadir.set_data_dir( + "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj" +) +os.environ["PROJ_LIB"] = ( + "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj" +) # Parse arguments parser = argparse.ArgumentParser() -parser.add_argument('file', type=str, help='filename') +parser.add_argument("file", type=str, help="filename") args = parser.parse_args() -chunks_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_by_latlon_v3/" +chunks_path = ( + "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_by_latlon_v3/" +) chunk_file = args.file grid_df = pd.read_feather(chunks_path + chunk_file) -grid_df.drop(columns=['latitude_right', 'longitude_right'], inplace=True) # this should have been dropped earlier... +grid_df.drop( + columns=["latitude_right", "longitude_right"], inplace=True +) # this should have been dropped earlier... + +grid_df["geometry"] = grid_df["geometry"].apply(wkt.loads) +grid_gdf = gpd.GeoDataFrame(grid_df, geometry="geometry") -grid_df['geometry'] = grid_df['geometry'].apply(wkt.loads) -grid_gdf = gpd.GeoDataFrame(grid_df, geometry='geometry') # Function to adjust longitude def adjust_longitude(geometry): @@ -72,15 +82,15 @@ def adjust_longitude(geometry): else: return geometry + gdf_merge = grid_gdf -gdf_merge['elevation'] = np.nan +gdf_merge["elevation"] = np.nan merge_counter = 0 success_counter = 0 -for file in os.listdir('/mnt/share/erf/SRTM_GL1_srtm/'): - - file_ext = file.split('.')[-1] +for file in os.listdir("/mnt/share/erf/SRTM_GL1_srtm/"): + file_ext = file.split(".")[-1] if file_ext != "tif": continue @@ -93,18 +103,24 @@ def adjust_longitude(geometry): if file_ext != "tif": continue - if NS == 'S': - NS_num= float(NS_num) * -1 + if NS == "S": + NS_num = float(NS_num) * -1 - if EW == 'W': + if EW == "W": EW_num = 360 - float(EW_num) - if (((EW_num < math.floor(gdf_merge['longitude_left'].min())) | (EW_num > math.ceil(gdf_merge['longitude_left'].max()))) | ((NS_num < math.floor(gdf_merge['latitude_left'].min())) | (NS_num > math.ceil(gdf_merge['latitude_left'].max())))): + if ( + (EW_num < math.floor(gdf_merge["longitude_left"].min())) + | (EW_num > math.ceil(gdf_merge["longitude_left"].max())) + ) | ( + (NS_num < math.floor(gdf_merge["latitude_left"].min())) + | (NS_num > math.ceil(gdf_merge["latitude_left"].max())) + ): merge_counter += 1 continue # rescaling 30 meters to 1 km - downscale_factor = 30/1000 + downscale_factor = 30 / 1000 # Load the raster with reo.open("/mnt/share/erf/SRTM_GL1_srtm/" + file) as src: @@ -112,40 +128,47 @@ def adjust_longitude(geometry): data = src.read( out_shape=( src.count, - int(src.height * downscale_factor), # multiply height by downscale factor - int(src.width * downscale_factor) # multiply widrh by downscale factor + int( + src.height * downscale_factor + ), # multiply height by downscale factor + int(src.width * downscale_factor), # multiply widrh by downscale factor ), - resampling=Resampling.bilinear # resampling method + resampling=Resampling.bilinear, # resampling method ) # scale image transform transform = src.transform * src.transform.scale( - (src.width / data.shape[-1]), - (src.height / data.shape[-2]) + (src.width / data.shape[-1]), (src.height / data.shape[-2]) ) - # convert the resampled raster to a geodataframe + # convert the resampled raster to a geodataframe results = ( - {'properties': {'raster_val': v}, 'geometry': s} - for i, (s, v) - in enumerate( - reo.features.shapes(data[0], transform=transform))) # assumes a single band image + {"properties": {"raster_val": v}, "geometry": s} + for i, (s, v) in enumerate( + reo.features.shapes(data[0], transform=transform) + ) + ) # assumes a single band image geodf = gpd.GeoDataFrame.from_features(list(results)) # Apply the function to the geometry column - geodf['geometry'] = geodf['geometry'].apply(adjust_longitude) + geodf["geometry"] = geodf["geometry"].apply(adjust_longitude) - prev_len = len(gdf_merge[~gdf_merge['elevation'].isna()]) # used for debugging + prev_len = len(gdf_merge[~gdf_merge["elevation"].isna()]) # used for debugging - gdf_merge = gpd.sjoin(gdf_merge, geodf, how='left', predicate='intersects') + gdf_merge = gpd.sjoin(gdf_merge, geodf, how="left", predicate="intersects") # test_merge.rename(columns = {'raster_val' : 'elevation'}, inplace=True) - gdf_merge['elevation'] = gdf_merge['elevation'].combine_first(gdf_merge['raster_val']) - gdf_merge.drop(columns = 'index_right', inplace=True) - gdf_merge.drop(columns = 'raster_val', inplace=True) - + gdf_merge["elevation"] = gdf_merge["elevation"].combine_first( + gdf_merge["raster_val"] + ) + gdf_merge.drop(columns="index_right", inplace=True) + gdf_merge.drop(columns="raster_val", inplace=True) - if len(gdf_merge[~gdf_merge['elevation'].isna()]) > prev_len: + if len(gdf_merge[~gdf_merge["elevation"].isna()]) > prev_len: success_counter += 1 -gdf_merge.to_feather("/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/" + chunk_file, index=False) \ No newline at end of file +gdf_merge.to_feather( + "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/" + + chunk_file, + index=False, +) diff --git a/temperature/grid_setup/grid_setup_with_era5_v3.py b/temperature/grid_setup/grid_setup_with_era5_v3.py index e55001e..2c3469f 100644 --- a/temperature/grid_setup/grid_setup_with_era5_v3.py +++ b/temperature/grid_setup/grid_setup_with_era5_v3.py @@ -23,7 +23,7 @@ from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error -from sklearn.model_selection import cross_val_score +from sklearn.model_selection import cross_val_score from sklearn.model_selection import GridSearchCV from sklearn.model_selection import RandomizedSearchCV from joblib import load @@ -45,24 +45,31 @@ import subprocess import pyproj -pyproj.datadir.set_data_dir('/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj') -os.environ['PROJ_LIB'] = '/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj' + +pyproj.datadir.set_data_dir( + "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj" +) +os.environ["PROJ_LIB"] = ( + "/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj" +) # Parse arguments parser = argparse.ArgumentParser() -parser.add_argument('file', type=str, help='filename') -parser.add_argument('era5_file', type=str, help='filename') +parser.add_argument("file", type=str, help="filename") +parser.add_argument("era5_file", type=str, help="filename") args = parser.parse_args() -chunks_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/" +chunks_path = ( + "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/" +) era5_path = "/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/" output_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_and_era5_v3/" chunk_file = args.file era5_file = args.era5_file -chunk_file_name = chunk_file.split('.')[0] +chunk_file_name = chunk_file.split(".")[0] print(chunk_file_name) @@ -71,13 +78,15 @@ # grid_df['geometry'] = grid_df['geometry'].apply(wkt.loads) # grid_gdf = gpd.GeoDataFrame(grid_df, geometry='geometry') -geometry = [Point(xy) for xy in zip(grid_df['longitude_left'], grid_df['latitude_left'])] +geometry = [ + Point(xy) for xy in zip(grid_df["longitude_left"], grid_df["latitude_left"]) +] grid_gdf = gpd.GeoDataFrame(grid_df, geometry=geometry) -grid_gdf = grid_gdf[grid_gdf['lcz_value'] != 0.0] +grid_gdf = grid_gdf[grid_gdf["lcz_value"] != 0.0] values_to_remove = [-999.0, -999.9] # replace with your actual values -grid_gdf = grid_gdf[~grid_gdf['elevation'].isin(values_to_remove)] -grid_gdf = grid_gdf[~grid_gdf['elevation'].isnull()] +grid_gdf = grid_gdf[~grid_gdf["elevation"].isin(values_to_remove)] +grid_gdf = grid_gdf[~grid_gdf["elevation"].isnull()] # if len(grid_gdf) < 1: # print("Reason for stop: The grid filtered out rows where LCZ = 0 and elevation = null. If no rows are left after this operation, there is nothing left to merge.") @@ -85,16 +94,17 @@ print(era5_file) -year = era5_file.split('_')[0] +year = era5_file.split("_")[0] # Start with an empty GeoDataFrame all_data = [] -for i in range(1,13): - - if (year == 2023) & (i > 8): # ERA5 data only available up to a certain date. No need to keep going. +for i in range(1, 13): + if (year == 2023) & ( + i > 8 + ): # ERA5 data only available up to a certain date. No need to keep going. continue - + print("Month: " + str(i)) era5_data = xr.open_dataset(era5_path + era5_file) @@ -102,32 +112,48 @@ now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Prepping era5 data at", current_time) - era5_data_month = era5_data.t2m.sel(time=era5_data['time.month'] == i) # subset by month - era5_data_month_df = era5_data_month.to_dataframe().reset_index().dropna() # convert to dataframe, reset indices, drop nan - era5_data_month_df['t2m'] = era5_data_month_df['t2m'] - 273.15 # convert to C + era5_data_month = era5_data.t2m.sel( + time=era5_data["time.month"] == i + ) # subset by month + era5_data_month_df = ( + era5_data_month.to_dataframe().reset_index().dropna() + ) # convert to dataframe, reset indices, drop nan + era5_data_month_df["t2m"] = era5_data_month_df["t2m"] - 273.15 # convert to C # era5_data_month_df['longitude'] = era5_data_month_df['longitude'].apply(lambda x: x-360 if x > 180 else x) # convert 0:360 to -180:180 now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Converting era5 data to GeoDataFrame at", current_time) - gdf_era5 = gpd.GeoDataFrame(era5_data_month_df, geometry=gpd.points_from_xy(era5_data_month_df.longitude, era5_data_month_df.latitude)) + gdf_era5 = gpd.GeoDataFrame( + era5_data_month_df, + geometry=gpd.points_from_xy( + era5_data_month_df.longitude, era5_data_month_df.latitude + ), + ) now = datetime.now() merge1_time = now.strftime("%I:%M:%S %p") print("Spatial joining era5 data at time", merge1_time) - gdf_merged_era5 = gpd.sjoin_nearest(grid_gdf, gdf_era5, how="left", distance_col="distances_era5", lsuffix='grid', rsuffix='era5') - gdf_merged_era5.drop(columns=['index_era5'], inplace=True) - - gdf_merged_era5.dropna(inplace=True) # get rid of rows with NaN. + gdf_merged_era5 = gpd.sjoin_nearest( + grid_gdf, + gdf_era5, + how="left", + distance_col="distances_era5", + lsuffix="grid", + rsuffix="era5", + ) + gdf_merged_era5.drop(columns=["index_era5"], inplace=True) + + gdf_merged_era5.dropna(inplace=True) # get rid of rows with NaN. # get rid of data that have not yet been "validated" now = datetime.now() export_time = now.strftime("%I:%M:%S %p") print("Checking if ERA5T data exists", export_time) - if 'expver' in gdf_merged_era5.columns: + if "expver" in gdf_merged_era5.columns: print("Removing ERA5T data") gdf_merged_era5 = gdf_merged_era5[gdf_merged_era5.expver != 5.0] - gdf_merged_era5.drop(columns = ['expver'], inplace=True) + gdf_merged_era5.drop(columns=["expver"], inplace=True) else: print("All included data is considered validated (not ERA5T)") @@ -137,30 +163,38 @@ print("More cleaning/processing...", export_time) # Convert 'time' column to datetime format - gdf_merged_era5['time'] = pd.to_datetime(gdf_merged_era5['time']) - gdf_merged_era5['month'] = gdf_merged_era5['time'].dt.month - gdf_merged_era5['year'] = gdf_merged_era5['time'].dt.year - gdf_merged_era5['day_of_year'] = gdf_merged_era5['time'].dt.dayofyear - + gdf_merged_era5["time"] = pd.to_datetime(gdf_merged_era5["time"]) + gdf_merged_era5["month"] = gdf_merged_era5["time"].dt.month + gdf_merged_era5["year"] = gdf_merged_era5["time"].dt.year + gdf_merged_era5["day_of_year"] = gdf_merged_era5["time"].dt.dayofyear + # gdf_merged_era5 = gdf_merged_era5[gdf_merged_era5['lcz_value'] != 0.0] # values_to_remove = [-999.0, -999.9] # replace with your actual values # gdf_merged_era5 = gdf_merged_era5[~gdf_merged_era5['elevation'].isin(values_to_remove)] # gdf_merged_era5 = gdf_merged_era5[~gdf_merged_era5['elevation'].isnull()] # drop columns not used - gdf_merged_era5.drop(columns=['time', 'distances_era5', 'latitude', 'longitude', 'geometry'], inplace=True) - + gdf_merged_era5.drop( + columns=["time", "distances_era5", "latitude", "longitude", "geometry"], + inplace=True, + ) + # rename columns to training data - gdf_merged_era5.rename(columns = {'latitude_left':'latitude_st', 'longitude_left':'longitude_st'}, inplace=True) - + gdf_merged_era5.rename( + columns={"latitude_left": "latitude_st", "longitude_left": "longitude_st"}, + inplace=True, + ) + # Make sure these columns are integers... - gdf_merged_era5['month'] = gdf_merged_era5['month'].astype(int) - gdf_merged_era5['year'] = gdf_merged_era5['year'].astype(int) - gdf_merged_era5['day_of_year'] = gdf_merged_era5['day_of_year'].astype(int) - + gdf_merged_era5["month"] = gdf_merged_era5["month"].astype(int) + gdf_merged_era5["year"] = gdf_merged_era5["year"].astype(int) + gdf_merged_era5["day_of_year"] = gdf_merged_era5["day_of_year"].astype(int) + # add total_days_in_year to identify leap years - gdf_merged_era5['total_days_in_year'] = pd.to_datetime(gdf_merged_era5['year'], format='%Y').dt.is_leap_year + 365 - + gdf_merged_era5["total_days_in_year"] = ( + pd.to_datetime(gdf_merged_era5["year"], format="%Y").dt.is_leap_year + 365 + ) + print("Length of merged gdf: " + str(len(gdf_merged_era5))) now = datetime.now() @@ -178,11 +212,11 @@ if not all_data.empty: now = datetime.now() export_time = now.strftime("%I:%M:%S %p") - print("Outputting to file at time", export_time) + print("Outputting to file at time", export_time) all_data.to_feather(output_path + chunk_file_name + "_" + year + ".feather") now = datetime.now() export_time = now.strftime("%I:%M:%S %p") print("File saved!", export_time) else: - print("The DataFrame is empty, file not saved.") \ No newline at end of file + print("The DataFrame is empty, file not saved.") diff --git a/temperature/merge_era5_stations_script_v3.py b/temperature/merge_era5_stations_script_v3.py index 973e719..bc2b0f9 100644 --- a/temperature/merge_era5_stations_script_v3.py +++ b/temperature/merge_era5_stations_script_v3.py @@ -19,53 +19,56 @@ # Parse arguments parser = argparse.ArgumentParser() -parser.add_argument('file', type=str, help='filename') +parser.add_argument("file", type=str, help="filename") args = parser.parse_args() file = args.file -era5_path = '/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/' -stations_path = '/ihme/homes/nhashmeh/downscale_temperature/global_summaries/' -lcz_filepath = "/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif" -output_path = '/mnt/share/erf/ERA5/merged_era5_with_stations/' +era5_path = "/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/" +stations_path = "/ihme/homes/nhashmeh/downscale_temperature/global_summaries/" +lcz_filepath = ( + "/ihme/homes/nhashmeh/downscale_temperature/climate_zones/lcz_filter_v2.tif" +) +output_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/" now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Loop start time =", current_time) with reo.open(lcz_filepath) as src: - - year = file.split('_')[0] # get year + year = file.split("_")[0] # get year # load in era5 data now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Loading in era5 data at", current_time) - era5_data = xr.open_dataset(era5_path + year + '_era5_temp_daily.nc') + era5_data = xr.open_dataset(era5_path + year + "_era5_temp_daily.nc") # load in station data now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Loading in and processing station data at", current_time) - stations_filename = year + '_all_data.csv' - station_data = pd.read_csv(stations_path + stations_filename) + stations_filename = year + "_all_data.csv" + station_data = pd.read_csv(stations_path + stations_filename) # processing station data... station_data.columns = station_data.columns.str.lower() - station_data.rename(columns={'date': 'time'}, inplace = True) - station_data['time'] = pd.to_datetime(station_data['time']) - station_data = station_data.dropna(how = 'any', subset = ['latitude', 'longitude', 'temp', 'elevation']) # drop rows where there are no coords (data isn't always clean...) - station_data['temp'] = (station_data['temp'] - 32) * 5/9 # convert to C - station_data.drop_duplicates(inplace=True) # these apparently exist... - station_data.drop(columns="station", inplace=True) # don't need station numbers... + station_data.rename(columns={"date": "time"}, inplace=True) + station_data["time"] = pd.to_datetime(station_data["time"]) + station_data = station_data.dropna( + how="any", subset=["latitude", "longitude", "temp", "elevation"] + ) # drop rows where there are no coords (data isn't always clean...) + station_data["temp"] = (station_data["temp"] - 32) * 5 / 9 # convert to C + station_data.drop_duplicates(inplace=True) # these apparently exist... + station_data.drop(columns="station", inplace=True) # don't need station numbers... - grouped_stations = station_data.groupby(station_data['time'].dt.month) + grouped_stations = station_data.groupby(station_data["time"].dt.month) now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Looping through months at", current_time) - for i in range(1,13): # loop through each month + for i in range(1, 13): # loop through each month print("Month: " + str(i)) group_i = grouped_stations.get_group(i) @@ -73,7 +76,9 @@ now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Converting climate stations to GeoDataFrame at time", current_time) - gdf_station = gpd.GeoDataFrame(group_i, geometry=gpd.points_from_xy(group_i.longitude, group_i.latitude)) + gdf_station = gpd.GeoDataFrame( + group_i, geometry=gpd.points_from_xy(group_i.longitude, group_i.latitude) + ) # Spatial join now = datetime.now() @@ -83,54 +88,88 @@ coords = np.transpose((gdf_station.geometry.x, gdf_station.geometry.y)) # Transpose your coordinate pairs, sample from the raster, and get raster coordinates - raster_values_and_coords = [(value, src.xy(*src.index(*coord))) for coord, value in zip(coords, src.sample(coords))] + raster_values_and_coords = [ + (value, src.xy(*src.index(*coord))) + for coord, value in zip(coords, src.sample(coords)) + ] # Separate values and coordinates raster_values = [value[0] for value in raster_values_and_coords] raster_coords = [coord for value, coord in raster_values_and_coords] # Add the raster values and coordinates to your dataframe - gdf_station['band_1'] = raster_values - gdf_station['band_1'] = gdf_station['band_1'].apply(lambda x: x[0]) - gdf_station['lcz_coords'] = raster_coords - gdf_station[['lcz_longitude', 'lcz_latitude']] = gdf_station['lcz_coords'].apply(pd.Series) - gdf_station.drop(columns='lcz_coords', inplace=True) + gdf_station["band_1"] = raster_values + gdf_station["band_1"] = gdf_station["band_1"].apply(lambda x: x[0]) + gdf_station["lcz_coords"] = raster_coords + gdf_station[["lcz_longitude", "lcz_latitude"]] = gdf_station[ + "lcz_coords" + ].apply(pd.Series) + gdf_station.drop(columns="lcz_coords", inplace=True) # Calculate distance between station and LCZ points - st_points = gpd.GeoSeries(gdf_station.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)) - lcz_points = gpd.GeoSeries(gdf_station.apply(lambda row: Point(row['lcz_longitude'], row['lcz_latitude']), axis=1)) - - gdf_station['distances_lcz'] = st_points.distance(lcz_points) + st_points = gpd.GeoSeries( + gdf_station.apply( + lambda row: Point(row["longitude"], row["latitude"]), axis=1 + ) + ) + lcz_points = gpd.GeoSeries( + gdf_station.apply( + lambda row: Point(row["lcz_longitude"], row["lcz_latitude"]), axis=1 + ) + ) + + gdf_station["distances_lcz"] = st_points.distance(lcz_points) # Prep era5 data now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Prepping era5 data at", current_time) - era5_data_month = era5_data.t2m.sel(time=era5_data['time.month'] == i) # subset by month - era5_data_month_df = era5_data_month.to_dataframe().reset_index().dropna() # convert to dataframe, reset indices, drop nan - era5_data_month_df['t2m'] = era5_data_month_df['t2m'] - 273.15 # convert to C - era5_data_month_df['longitude'] = era5_data_month_df['longitude'].apply(lambda x: x-360 if x > 180 else x) # convert 0:360 to -180:180 + era5_data_month = era5_data.t2m.sel( + time=era5_data["time.month"] == i + ) # subset by month + era5_data_month_df = ( + era5_data_month.to_dataframe().reset_index().dropna() + ) # convert to dataframe, reset indices, drop nan + era5_data_month_df["t2m"] = era5_data_month_df["t2m"] - 273.15 # convert to C + era5_data_month_df["longitude"] = era5_data_month_df["longitude"].apply( + lambda x: x - 360 if x > 180 else x + ) # convert 0:360 to -180:180 now = datetime.now() current_time = now.strftime("%I:%M:%S %p") print("Converting era5 data to GeoDataFrame at", current_time) - gdf_era5 = gpd.GeoDataFrame(era5_data_month_df, geometry=gpd.points_from_xy(era5_data_month_df.longitude, era5_data_month_df.latitude)) + gdf_era5 = gpd.GeoDataFrame( + era5_data_month_df, + geometry=gpd.points_from_xy( + era5_data_month_df.longitude, era5_data_month_df.latitude + ), + ) # merge era5 and stations dataframes now = datetime.now() merge1_time = now.strftime("%I:%M:%S %p") print("Spatial joining era5 data at time", merge1_time) # Currently setting max distance for join to 3 (distance in degrees between lat/lon points) - gdf_merged_final = gpd.sjoin_nearest(gdf_station, gdf_era5, how="left", distance_col="distances_era5", lsuffix='st', rsuffix='era5', max_distance = 3) + gdf_merged_final = gpd.sjoin_nearest( + gdf_station, + gdf_era5, + how="left", + distance_col="distances_era5", + lsuffix="st", + rsuffix="era5", + max_distance=3, + ) now = datetime.now() current_time = now.strftime("%I:%M:%S %p") - print("Cleaning up final dataframe...", current_time) + print("Cleaning up final dataframe...", current_time) # Drop unneeded columns that came from the era5 dataframe - gdf_merged_final.drop(columns=['index_era5'], inplace=True) + gdf_merged_final.drop(columns=["index_era5"], inplace=True) # get time matching rows because spatial join doesn't include the time aspect.... - gdf_merged_final = gdf_merged_final.loc[gdf_merged_final["time_st"]==gdf_merged_final["time_era5"]] + gdf_merged_final = gdf_merged_final.loc[ + gdf_merged_final["time_st"] == gdf_merged_final["time_era5"] + ] # There are cases where multiple stations exist within a single era5 pixel. These often have very similar elevations and temperatures, # sometimes even the same elevation but different temps (or the other way around). Currently, I am keeping these in order to have the model @@ -141,16 +180,20 @@ # To deal with this, we take the row with the minimum distance between the pixel and the station to represent the era5 temp for # that station. - gdf_merged_final = gdf_merged_final.loc[gdf_merged_final.groupby(['latitude_st', 'longitude_st', 'time_st'])['distances_era5'].idxmin()] + gdf_merged_final = gdf_merged_final.loc[ + gdf_merged_final.groupby(["latitude_st", "longitude_st", "time_st"])[ + "distances_era5" + ].idxmin() + ] # get rid of data that have not yet been "validated" now = datetime.now() export_time = now.strftime("%I:%M:%S %p") print("Checking if ERA5T data exists", export_time) - if 'expver' in gdf_merged_final.columns: + if "expver" in gdf_merged_final.columns: print("Removing ERA5T data") gdf_merged_final = gdf_merged_final[gdf_merged_final.expver != 5.0] - gdf_merged_final.drop(columns = ['expver'], inplace=True) + gdf_merged_final.drop(columns=["expver"], inplace=True) else: print("All included data is considered validated (not ERA5T)") @@ -160,23 +203,41 @@ print("More cleaning/processing...", export_time) # Convert 'time' column to datetime format - gdf_merged_final['time'] = pd.to_datetime(gdf_merged_final['time_st']) - gdf_merged_final['month'] = gdf_merged_final['time'].dt.month - gdf_merged_final['year'] = gdf_merged_final['time'].dt.year - gdf_merged_final['day_of_year'] = gdf_merged_final['time'].dt.dayofyear + gdf_merged_final["time"] = pd.to_datetime(gdf_merged_final["time_st"]) + gdf_merged_final["month"] = gdf_merged_final["time"].dt.month + gdf_merged_final["year"] = gdf_merged_final["time"].dt.year + gdf_merged_final["day_of_year"] = gdf_merged_final["time"].dt.dayofyear - # Create 'total_days_in_year' feature - gdf_merged_final['total_days_in_year'] = gdf_merged_final['time'].dt.is_leap_year + 365 + # Create 'total_days_in_year' feature + gdf_merged_final["total_days_in_year"] = ( + gdf_merged_final["time"].dt.is_leap_year + 365 + ) # drop columns not used - gdf_merged_final.drop(columns=['time', 'time_st', 'geometry', 'lcz_longitude', 'lcz_latitude', 'distances_lcz', - 'time_era5', 'latitude_era5', 'longitude_era5', 'distances_era5',], inplace=True) + gdf_merged_final.drop( + columns=[ + "time", + "time_st", + "geometry", + "lcz_longitude", + "lcz_latitude", + "distances_lcz", + "time_era5", + "latitude_era5", + "longitude_era5", + "distances_era5", + ], + inplace=True, + ) now = datetime.now() export_time = now.strftime("%I:%M:%S %p") print("Exporting to .feather at time", export_time) - gdf_merged_final.to_feather(output_path + year + '_' + str(i) + '_era5_stations_lcz_merged.feather', index=False) + gdf_merged_final.to_feather( + output_path + year + "_" + str(i) + "_era5_stations_lcz_merged.feather", + index=False, + ) now = datetime.now() export_time = now.strftime("%I:%M:%S %p") - print("File exported at time", export_time) \ No newline at end of file + print("File exported at time", export_time) diff --git a/temperature/pre_launch_scripts/climate_zones/readme.txt b/temperature/pre_launch_scripts/climate_zones/readme.txt index f31c4c2..b344478 100644 --- a/temperature/pre_launch_scripts/climate_zones/readme.txt +++ b/temperature/pre_launch_scripts/climate_zones/readme.txt @@ -12,26 +12,26 @@ Principal Authors Information: Principal Investigator: Matthias Demuzere (matthias.demuzere ~at~ rub.de) Alternate Contact: Benjamin Bechtel (benjamin.bechtel ~at~ rub.de) -Summary: - A global 100 m spatial resolution Local Climate Zone (LCZ) map, derived from multiple earth - observation datasets and expert LCZ class labels. - - The LCZ map is based on the LCZ typology (Stewart and Oke, 2012) that distinguish - urban surfaces accounting for their typical combination of micro-scale land-covers - and associated physical properties. The LCZ scheme is distinguished from other land use - / land cover schemes by its focus on urban and rural landscape types, which can be - described by any of the 17 classes in the LCZ scheme. - - Out of the 17 LCZ classes, 10 reflect the 'built' environment, and each LCZ type is - associated with generic numerical descriptions of key urban canopy parameters critical - to model atmospheric responses to urbanisation. In addition, since LCZs were originally - designed as a new framework for urban heat island studies (Stewart and Oke, 2012), they - also contain a limited set (7) of 'natural' land-cover classes that can be used as 'control' - or 'natural reference' areas. As these seven natural classes in the LCZ scheme can not - capture the heterogeneity of the world’s existing natural ecosystems, we advise - users - if required - to combine the built LCZ classes with any other land-cover product +Summary: + A global 100 m spatial resolution Local Climate Zone (LCZ) map, derived from multiple earth + observation datasets and expert LCZ class labels. + + The LCZ map is based on the LCZ typology (Stewart and Oke, 2012) that distinguish + urban surfaces accounting for their typical combination of micro-scale land-covers + and associated physical properties. The LCZ scheme is distinguished from other land use + / land cover schemes by its focus on urban and rural landscape types, which can be + described by any of the 17 classes in the LCZ scheme. + + Out of the 17 LCZ classes, 10 reflect the 'built' environment, and each LCZ type is + associated with generic numerical descriptions of key urban canopy parameters critical + to model atmospheric responses to urbanisation. In addition, since LCZs were originally + designed as a new framework for urban heat island studies (Stewart and Oke, 2012), they + also contain a limited set (7) of 'natural' land-cover classes that can be used as 'control' + or 'natural reference' areas. As these seven natural classes in the LCZ scheme can not + capture the heterogeneity of the world’s existing natural ecosystems, we advise + users - if required - to combine the built LCZ classes with any other land-cover product that provides a wider range of natural land-cover classes. - + -------------------------- @@ -42,63 +42,63 @@ Licenses/restrictions placed on the data, or limitations of reuse: Creative Comm Recommended citations for the data: - - Demuzere M, Kittner J, Martilli A, et al. A global map of local climate zones to support - earth system modelling and urban-scale environmental science. Earth Syst Sci Data. + - Demuzere M, Kittner J, Martilli A, et al. A global map of local climate zones to support + earth system modelling and urban-scale environmental science. Earth Syst Sci Data. 2022a;14(8):3835-3873. doi:10.5194/essd-14-3835-2022 - Demuzere M, Kittner J, Martilli A, et al. Global Local Climate Zone map. Zenodo (2022b) doi:10.5281/zenodo.6364594. Other Notes: - Permission is hereby granted, free of charge, to any person obtaining a copy of this data - and associated documentation files, to use these data without restriction, + Permission is hereby granted, free of charge, to any person obtaining a copy of this data + and associated documentation files, to use these data without restriction, subject to the following: - - Understand that these data are created by crowd sourcing and machine learning and + - Understand that these data are created by crowd sourcing and machine learning and thus will naturally contain some errors. It comes as it is without any warranty. - - The above copyright notice and this permission notice shall be included in all copies or + - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the data. - The authors and WUDAPT contributors are acknowledged where appropriate - For scientific use, the following papers could be cited to introduce the LCZ concepts: - * Stewart, I. D., & Oke, T. R. (2012). Local Climate Zones for Urban Temperature - Studies. Bulletin of the American Meteorological Society, 93(12), 1879–1900. + * Stewart, I. D., & Oke, T. R. (2012). Local Climate Zones for Urban Temperature + Studies. Bulletin of the American Meteorological Society, 93(12), 1879–1900. https://doi.org/10.1175/BAMS-D-11-00019.1 - * Bechtel B, Daneke C (2012) Classification of Local Climate Zones based on multiple - Earth Observation data. IEEE Journal of Selected Topics in Applied Earth + * Bechtel B, Daneke C (2012) Classification of Local Climate Zones based on multiple + Earth Observation data. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing 5:1191-1202 http://doi.org/10.1109/JSTARS.2012.2189873 - * Bechtel B., Alexander P.J., Böhner J., Ching J., Conrad O., Feddema J., Mills G., - See L., Stewart I. (2015) Mapping Local Climate Zones for a Worldwide Database - of the Form and Function of Cities. ISPRS International Journal of + * Bechtel B., Alexander P.J., Böhner J., Ching J., Conrad O., Feddema J., Mills G., + See L., Stewart I. (2015) Mapping Local Climate Zones for a Worldwide Database + of the Form and Function of Cities. ISPRS International Journal of Geo-Information 4:199-219. http://doi.org/10.3390/ijgi4010199 - * Ching, J., Mills, G., Bechtel, B., See, L., Feddema, J., - Wang, X., … Theeuwes, N. (2018). WUDAPT: An Urban Weather, Climate, and - Environmental Modeling Infrastructure for the Anthropocene. - Bulletin of the American Meteorological Society, 99(9), 1907–1924. + * Ching, J., Mills, G., Bechtel, B., See, L., Feddema, J., + Wang, X., … Theeuwes, N. (2018). WUDAPT: An Urban Weather, Climate, and + Environmental Modeling Infrastructure for the Anthropocene. + Bulletin of the American Meteorological Society, 99(9), 1907–1924. https://doi.org/10.1175/BAMS-D-16-0236.1 - * Bechtel, B., Alexander, P. J., Beck, C., Böhner, J., Brousse, O., - Ching, J., … Xu, Y. (2019). Generating WUDAPT Level 0 data – Current status of - production and evaluation. Urban Climate, 27, 24–45. + * Bechtel, B., Alexander, P. J., Beck, C., Böhner, J., Brousse, O., + Ching, J., … Xu, Y. (2019). Generating WUDAPT Level 0 data – Current status of + production and evaluation. Urban Climate, 27, 24–45. https://doi.org/10.1016/j.uclim.2018.10.001 - * Demuzere, M., Bechtel, B., & Mills, G. (2019). Global transferability of - local climate zone models. Urban Climate, 27, 46–63. + * Demuzere, M., Bechtel, B., & Mills, G. (2019). Global transferability of + local climate zone models. Urban Climate, 27, 46–63. https://doi.org/10.1016/j.uclim.2018.11.001 - * Demuzere, M., Bechtel, B., Middel, A., & Mills, G. (2019). Mapping Europe into - local climate zones. PLOS ONE, 14(4), e0214474. + * Demuzere, M., Bechtel, B., Middel, A., & Mills, G. (2019). Mapping Europe into + local climate zones. PLOS ONE, 14(4), e0214474. https://doi.org/10.1371/journal.pone.0214474 - * Bechtel B., Demuzere M., Stewart ID. (2020) A Weighted Accuracy Measure for - Land Cover Mapping: Comment on Johnson et al. Local Climate Zone (LCZ) Map - Accuracy Assessments Should Account for Land Cover Physical Characteristics - that Affect the Local Thermal Environment. Remote Sens. 2019, 11, 2420. - Remote Sens. 12(11):1769. + * Bechtel B., Demuzere M., Stewart ID. (2020) A Weighted Accuracy Measure for + Land Cover Mapping: Comment on Johnson et al. Local Climate Zone (LCZ) Map + Accuracy Assessments Should Account for Land Cover Physical Characteristics + that Affect the Local Thermal Environment. Remote Sens. 2019, 11, 2420. + Remote Sens. 12(11):1769. https://doi.org/10.3390/rs12111769 - * Demuzere M., Hankey S., Mills G., Zhang W., Lu T., Bechtel B. (2020) Combining - expert and crowd-sourced training data to map urban form and functions - for the continental US. Sci Data. 7(1):264. + * Demuzere M., Hankey S., Mills G., Zhang W., Lu T., Bechtel B. (2020) Combining + expert and crowd-sourced training data to map urban form and functions + for the continental US. Sci Data. 7(1):264. https://doi.org/10.1038/s41597-020-00605-z - * Demuzere M., Kittner J., Bechtel B. (2021) LCZ Generator: A Web Application - to Create Local Climate Zone Maps. Front Environ Sci. 9. + * Demuzere M., Kittner J., Bechtel B. (2021) LCZ Generator: A Web Application + to Create Local Climate Zone Maps. Front Environ Sci. 9. https://doi.org/10.3389/fenvs.2021.637455 @@ -110,13 +110,13 @@ Files and brief description: 1. lcz_filter_v2.tif - The recommended global LCZ map, compressed as GeoTIFF, with LCZ classes indicated by numbers 1-17. + The recommended global LCZ map, compressed as GeoTIFF, with LCZ classes indicated by numbers 1-17. LCZ labels are obtained after applying the morphological Gaussian filter described in Demuzere et al. (2020). The official color scheme - as indicated in the table below - is embedded into the GeoTIFF. - - The table below describes the class number | official class description | official hex color. - - LCZ 1 | Compact highrise | '#910613' + + The table below describes the class number | official class description | official hex color. + + LCZ 1 | Compact highrise | '#910613' LCZ 2 | Compact midrise | '#D9081C' LCZ 3 | Compact lowrise | '#FF0A22' LCZ 4 | Open highrise | '#C54F1E' @@ -134,7 +134,7 @@ Files and brief description: LCZ 16 (F) | Bare soil or sand | '#FCF7B1' LCZ 17 (G) | Water | '#656BFA' - + Other characteristics: - Projection: World Geodetic System 1984 (EPSG:4326) - Size: 389620, 155995 @@ -144,13 +144,13 @@ Files and brief description: 2. lcz_v2.tif - Same as 1., but presenting the raw LCZ map, before applying the morphological Gaussian filter. + Same as 1., but presenting the raw LCZ map, before applying the morphological Gaussian filter. 3. lcz_probability_v2.tif - A probability layer (%) that identifies how often the modal LCZ was chosen per pixel - (e.g. a probability of 60% means that the modal LCZ class was mapped 30 times out of + A probability layer (%) that identifies how often the modal LCZ was chosen per pixel + (e.g. a probability of 60% means that the modal LCZ class was mapped 30 times out of 50 LCZ models). This is a pixel-based probability, derived from the lcz_v2.tif map. -------------------- @@ -161,4 +161,3 @@ If there are there multiple versions of the dataset, list the file updated, when _v1: initial release of the data (20220222) _v2: add LCZ map of Iceland, consistent with the global LCZ map procedure (20221115) - diff --git a/temperature/pre_launch_scripts/era5_data/cdsapi_run.py b/temperature/pre_launch_scripts/era5_data/cdsapi_run.py index 08639a6..1620321 100644 --- a/temperature/pre_launch_scripts/era5_data/cdsapi_run.py +++ b/temperature/pre_launch_scripts/era5_data/cdsapi_run.py @@ -1,48 +1,78 @@ import cdsapi import argparse -parser = argparse.ArgumentParser(description='Year input') -parser.add_argument('year', metavar='Y', type=str, - help='the year of interest as a string') +parser = argparse.ArgumentParser(description="Year input") +parser.add_argument( + "year", metavar="Y", type=str, help="the year of interest as a string" +) args = parser.parse_args() c = cdsapi.Client() print(args) c.retrieve( - 'reanalysis-era5-land', + "reanalysis-era5-land", { - 'variable': '2m_temperature', - 'year': args.year, - 'month': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', + "variable": "2m_temperature", + "year": args.year, + "month": [ + "01", + "02", + "03", + "04", + "05", + "06", + "07", + "08", + "09", + "10", + "11", + "12", ], - 'day': [ - '01', '02', '03', - '04', '05', '06', - '07', '08', '09', - '10', '11', '12', - '13', '14', '15', - '16', '17', '18', - '19', '20', '21', - '22', '23', '24', - '25', '26', '27', - '28', '29', '30', - '31', + "day": [ + "01", + "02", + "03", + "04", + "05", + "06", + "07", + "08", + "09", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", ], - 'format': 'netcdf.zip', + "format": "netcdf.zip", # 'format': 'grib', - 'time': [ - '00:00', - '03:00', - '06:00', - '09:00', - '12:00', - '15:00', - '18:00', - '21:00', + "time": [ + "00:00", + "03:00", + "06:00", + "09:00", + "12:00", + "15:00", + "18:00", + "21:00", ], }, - '/mnt/share/erf/ERA5/three_hourly_temp_9km/{}_download.zip'.format(args.year)) \ No newline at end of file + "/mnt/share/erf/ERA5/three_hourly_temp_9km/{}_download.zip".format(args.year), +) diff --git a/temperature/pre_launch_scripts/era5_data/era5_data_hourly_to_daily.py b/temperature/pre_launch_scripts/era5_data/era5_data_hourly_to_daily.py index 7b79f7d..8ec004e 100644 --- a/temperature/pre_launch_scripts/era5_data/era5_data_hourly_to_daily.py +++ b/temperature/pre_launch_scripts/era5_data/era5_data_hourly_to_daily.py @@ -18,19 +18,19 @@ # Parse arguments parser = argparse.ArgumentParser() -parser.add_argument('file', type=str, help='filename') +parser.add_argument("file", type=str, help="filename") args = parser.parse_args() file = args.file -path_to_files = '/mnt/share/erf/ERA5/three_hourly_temp_9km/downloaded_unzipped/' -output_path = '/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/' +path_to_files = "/mnt/share/erf/ERA5/three_hourly_temp_9km/downloaded_unzipped/" +output_path = "/mnt/share/erf/ERA5/three_hourly_temp_9km/daily_temps/" get_name = file.split(".")[0] new_filename = get_name + "_daily.nc" -data = xr.open_dataset(path_to_files + file) # loads in data +data = xr.open_dataset(path_to_files + file) # loads in data print("Calculating daily averages from hourly data for " + get_name) -daily_data = data.resample(time='D').mean() # calculates daily averages +daily_data = data.resample(time="D").mean() # calculates daily averages print("Saving new daily averages as " + new_filename) -daily_data.to_netcdf(output_path + new_filename) \ No newline at end of file +daily_data.to_netcdf(output_path + new_filename) diff --git a/temperature/regional_predictions.py b/temperature/regional_predictions.py index 8c2585b..95906e1 100644 --- a/temperature/regional_predictions.py +++ b/temperature/regional_predictions.py @@ -12,7 +12,7 @@ # os.environ['PROJ_LIB'] = '/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj' # Output path (should make this an argument) -output_path = '/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/predictions/' +output_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/predictions/" task_id = int(sys.argv[1]) year = int(sys.argv[2]) @@ -26,79 +26,133 @@ list_of_files = [] -list_of_files = [os.path.join(path_to_chunk, file) for file in os.listdir(path_to_chunk) if file.split('_')[-1].split('.')[0] == str(year)] +list_of_files = [ + os.path.join(path_to_chunk, file) + for file in os.listdir(path_to_chunk) + if file.split("_")[-1].split(".")[0] == str(year) +] filename = list_of_files[task_id] print("Filename: ", filename) -new_filename = output_path + 'predictions_' + filename.split('/')[-1].split('.')[0] +new_filename = output_path + "predictions_" + filename.split("/")[-1].split(".")[0] # Load model -model = load('/mnt/share/code/nhashmeh/downscaling/temperature/validation_model_one_dummy_v3.joblib') -scaler = load('/mnt/share/code/nhashmeh/downscaling/temperature/validation_scaler_one_dummy_v3.joblib') +model = load( + "/mnt/share/code/nhashmeh/downscaling/temperature/validation_model_one_dummy_v3.joblib" +) +scaler = load( + "/mnt/share/code/nhashmeh/downscaling/temperature/validation_scaler_one_dummy_v3.joblib" +) # Loop through each chunk, then loop through each day in that file, then process a little bit before predictions loaded_data = pd.read_feather(filename) -num_unique_days = loaded_data['day_of_year'].nunique() -print(f"Number of unique days: {num_unique_days}") # there should be 365 or 366 -unique_days = loaded_data['day_of_year'].unique() +num_unique_days = loaded_data["day_of_year"].nunique() +print(f"Number of unique days: {num_unique_days}") # there should be 365 or 366 +unique_days = loaded_data["day_of_year"].unique() -final_data = pd.DataFrame() # this is where the output of each loop will be stored +final_data = pd.DataFrame() # this is where the output of each loop will be stored for day in unique_days: - - loaded_data_day = loaded_data[loaded_data['day_of_year'] == day] + loaded_data_day = loaded_data[loaded_data["day_of_year"] == day] # Convert longitude to -180 to 180 to match training data - loaded_data_day.loc[loaded_data_day['longitude_st'] > 180, 'longitude_st'] = loaded_data_day.loc[loaded_data_day['longitude_st'] > 180, 'longitude_st'] - 360 + loaded_data_day.loc[loaded_data_day["longitude_st"] > 180, "longitude_st"] = ( + loaded_data_day.loc[loaded_data_day["longitude_st"] > 180, "longitude_st"] - 360 + ) # Keeping a few columns to use later - lcz_original = loaded_data_day['lcz_value'] + lcz_original = loaded_data_day["lcz_value"] - print('converting lcz_value to dummies') - data_expanded = pd.get_dummies(loaded_data_day, columns=['lcz_value']) + print("converting lcz_value to dummies") + data_expanded = pd.get_dummies(loaded_data_day, columns=["lcz_value"]) - print('adding missing dummies and filling them with zeros') + print("adding missing dummies and filling them with zeros") # Get existing lcz_value columns - existing_lcz_columns = [col for col in data_expanded.columns if 'lcz_value_' in col] + existing_lcz_columns = [col for col in data_expanded.columns if "lcz_value_" in col] # Create a dataframe with dummy columns for missing lcz_values - missing_lcz_values = pd.DataFrame(columns=[f'lcz_value_{i}' for i in range(18) if f'lcz_value_{i}' not in existing_lcz_columns]) + missing_lcz_values = pd.DataFrame( + columns=[ + f"lcz_value_{i}" + for i in range(18) + if f"lcz_value_{i}" not in existing_lcz_columns + ] + ) # Concatenate this with your original dataframe data_expanded = pd.concat([data_expanded, missing_lcz_values], axis=1) # Fill NaN values with 0 only in missing lcz_value columns - data_expanded[missing_lcz_values.columns] = data_expanded[missing_lcz_values.columns].fillna(0) + data_expanded[missing_lcz_values.columns] = data_expanded[ + missing_lcz_values.columns + ].fillna(0) # Create 'day_of_year_sin' and 'day_of_year_cos' features (these will replace day_of_year) - data_expanded['day_of_year_sin'] = np.sin(2 * np.pi * data_expanded['day_of_year']/data_expanded['total_days_in_year']) - data_expanded['day_of_year_cos'] = np.cos(2 * np.pi * data_expanded['day_of_year']/data_expanded['total_days_in_year']) + data_expanded["day_of_year_sin"] = np.sin( + 2 * np.pi * data_expanded["day_of_year"] / data_expanded["total_days_in_year"] + ) + data_expanded["day_of_year_cos"] = np.cos( + 2 * np.pi * data_expanded["day_of_year"] / data_expanded["total_days_in_year"] + ) # match column order of training data (this drops day_of_year) - data_expanded = data_expanded[['latitude_st', 'longitude_st', 'elevation', 't2m', 'month', 'year', - 'total_days_in_year', 'day_of_year_sin', 'day_of_year_cos', - 'lcz_value_1', 'lcz_value_2', 'lcz_value_3', 'lcz_value_4', 'lcz_value_5', 'lcz_value_6', 'lcz_value_7', - 'lcz_value_8', 'lcz_value_9', 'lcz_value_10', 'lcz_value_11', 'lcz_value_12', 'lcz_value_13', - 'lcz_value_14', 'lcz_value_15','lcz_value_16', 'lcz_value_17']] - + data_expanded = data_expanded[ + [ + "latitude_st", + "longitude_st", + "elevation", + "t2m", + "month", + "year", + "total_days_in_year", + "day_of_year_sin", + "day_of_year_cos", + "lcz_value_1", + "lcz_value_2", + "lcz_value_3", + "lcz_value_4", + "lcz_value_5", + "lcz_value_6", + "lcz_value_7", + "lcz_value_8", + "lcz_value_9", + "lcz_value_10", + "lcz_value_11", + "lcz_value_12", + "lcz_value_13", + "lcz_value_14", + "lcz_value_15", + "lcz_value_16", + "lcz_value_17", + ] + ] + # sanity check: make sure there is only one unique month value... - month_store = data_expanded['month'].unique() + month_store = data_expanded["month"].unique() if len(month_store) > 1: print(month_store) print("More than one month value exists in data, ending loop") break # Predictions start here - print('Processing data for day of year ' + str(day) + ' (month, year: ' + str(month_store[0]) + ', ' + str(year) + ')') - + print( + "Processing data for day of year " + + str(day) + + " (month, year: " + + str(month_store[0]) + + ", " + + str(year) + + ")" + ) + X_test = scaler.transform(data_expanded) now1 = datetime.now() - + # Make predictions predictions = model.predict(X_test) @@ -107,15 +161,16 @@ print("Predictions complete, time elapsed: ", time_elapsed) # Trimming things down to what we want/need... - data_expanded_trim = data_expanded[['latitude_st', 'longitude_st', 'elevation', 't2m', 'month', 'year']].copy() - data_expanded_trim['day_of_year'] = day - data_expanded_trim['lcz_value'] = lcz_original - data_expanded_trim['predictions'] = predictions + data_expanded_trim = data_expanded[ + ["latitude_st", "longitude_st", "elevation", "t2m", "month", "year"] + ].copy() + data_expanded_trim["day_of_year"] = day + data_expanded_trim["lcz_value"] = lcz_original + data_expanded_trim["predictions"] = predictions # Append the data for the current day to the larger DataFrame final_data = pd.concat([final_data, data_expanded_trim]) # Save the final data -final_data.reset_index(inplace = True) -final_data.to_feather(new_filename + '.feather') - +final_data.reset_index(inplace=True) +final_data.to_feather(new_filename + ".feather")