Skip to content

Commit

Permalink
add notebook to check Herbie's CRS extraction matches pygrib
Browse files Browse the repository at this point in the history
  • Loading branch information
blaylockbk committed Jan 17, 2025
1 parent 9ffc5a3 commit 925abb5
Show file tree
Hide file tree
Showing 9 changed files with 375,082 additions and 6,669 deletions.
180 changes: 180 additions & 0 deletions check_pygrib_vs_herbie_crs_extraction.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Check CRS extraction\n",
"\n",
"I was having issues with pygrib running into segmentation faults in my GitHub Actions after it was updated to support Numpy v2+. I was only using pygrib to extract coordinate reference system (CRS) data, and I would like to remove it as a dependency.\n",
"\n",
"Here I am comparing the CRS proj parameters extracted by pygrib and what is extracted by Herbie (from the keys read by cfgrib). \n",
"\n",
"**Since pygrib doesn't work in GitHub actions for me for whatever reason, I should run this notebook before any release to manually test the values between pygrib and Herbie agree.**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from herbie import Herbie\n",
"import pygrib\n",
"from pyproj import CRS\n",
"from herbie.crs import get_cf_crs"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Model: HRRR\n",
" pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}\n",
" equal= True\n",
"\n",
"Model: HRRRAK\n",
" pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 90.0, 'lat_ts': 60.0, 'lon_0': 225.0, 'proj': 'stere'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 90, 'lat_ts': 60.0, 'lon_0': 225.0, 'proj': 'stere'}\n",
" equal= True\n",
"\n",
"Model: GFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"👨🏻‍🏭 Created directory: [/home/blaylock/data/graphcast/20250101]\n",
"\n",
"Model: GRAPHCAST\n",
" pygrib {'a': 4326.0, 'b': 4326.0, 'proj': 'longlat'}\n",
" Herbie {'a': 4326.0, 'b': 4326.0, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: CFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: IFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: AIFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: GEFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: GEFS\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: HAFSA\n",
" pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}\n",
" equal= True\n",
"\n",
"Model: HREF\n",
" pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" equal= True\n",
"\n",
"Model: NAM\n",
" pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}\n",
" Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}\n",
" equal= True\n",
"\n",
"Model: URMA\n",
" pygrib {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" Herbie {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" equal= True\n",
"\n",
"Model: RTMA\n",
" pygrib {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" Herbie {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}\n",
" equal= True\n"
]
}
],
"source": [
"for model, search in [\n",
" (dict(model=\"hrrr\"), \":TMP:2 m above\"),\n",
" (dict(model=\"hrrrak\"), \":TMP:2 m above\"),\n",
" (dict(model=\"gfs\"), \":TMP:2 m above\"),\n",
" (dict(model=\"graphcast\"), \":TMP:2 m above\"),\n",
" (dict(model=\"cfs\", member=1, product=\"6_hourly\", kind=\"flxf\"), \":TMP:2 m above\"),\n",
" (dict(model=\"ifs\"), \":2t:\"),\n",
" (dict(model=\"aifs\"), \":2t:\"),\n",
" (dict(model=\"gefs\", member=1), \":TMP:2 m above\"),\n",
" (dict(model=\"gefs\", product=\"wave\", member=1), \":WIND:surf\"),\n",
" (dict(model=\"hafsa\", product=\"storm.atm\", storm=\"07s\"), \":TMP:2 m above\"),\n",
" (dict(model=\"href\", products=\"mean\", domain=\"conus\", fxx=1), \":TMP:2 m above\"),\n",
" (dict(model=\"nam\"), \":TMP:2 m above\"),\n",
" (dict(model=\"urma\"), \":TMP:2 m above\"),\n",
" (dict(model=\"rtma\"), \":TMP:2 m above\"),\n",
"]:\n",
" if model[\"model\"] in (\"hafsa\", \"href\"): # only on nomads\n",
" date = \"2025-01-16 06:00\"\n",
" else:\n",
" date = \"2025-01-01\"\n",
"\n",
" ds = Herbie(date, verbose=False, **model).xarray(\n",
" search, remove_grib=False, _use_pygrib_for_crs=False\n",
" )\n",
"\n",
" print()\n",
" print(f\"Model: {model['model'].upper()}\")\n",
" with pygrib.open(str(ds.local_grib)) as grb:\n",
" msg = grb.message(1)\n",
" projparams_dict_pygrib = dict(sorted(msg.projparams.items()))\n",
" print(\" pygrib\", projparams_dict_pygrib)\n",
"\n",
" projparams_dict_herbie = dict(\n",
" sorted(get_cf_crs(ds, _return_projparams=True).items())\n",
" )\n",
" print(\" Herbie\", projparams_dict_herbie)\n",
" print(\" equal=\", projparams_dict_herbie == projparams_dict_pygrib)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "herbie-dev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
5 changes: 5 additions & 0 deletions herbie/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,12 +1156,17 @@ def xarray(
backend_kwargs=backend_kwargs,
)

for ds in Hxr:
# Need model attribute before using get_cf_crs
ds.attrs["model"] = str(self.model)

# Get CF convention coordinate reference system (crs) information.
# NOTE: Assumes the projection is the same for all variables.
if _use_pygrib_for_crs:
# Get CF grid projection information with pygrib and pyproj because
# this is something cfgrib doesn't do (https://github.com/ecmwf/cfgrib/issues/251)
import pygrib

with pygrib.open(str(local_file)) as grb:
msg = grb.message(1)
cf_params = CRS(msg.projparams).to_cf()
Expand Down
133 changes: 99 additions & 34 deletions herbie/crs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""CF convention coordinate reference system (CRS)."""
"""CF convention coordinate reference system (CRS).
Check out the notebook `check_pygrib_vs_herbie_crs_extraction.ipynb`
to test how Herbie and pygrib are extracting the CRS information.
"""

from typing import TYPE_CHECKING, Any

Expand All @@ -8,7 +12,9 @@
import xarray as xr


def get_cf_crs(ds: "xr.Dataset", variable: str | None = None) -> dict[str, Any]:
def get_cf_crs(
ds: "xr.Dataset", variable: str | None = None, _return_projparams=False
) -> dict[str, Any]:
"""
Extract the CF coordinate reference system (CRS) from a cfgrib xarray dataset.
Expand All @@ -31,16 +37,23 @@ def get_cf_crs(ds: "xr.Dataset", variable: str | None = None) -> dict[str, Any]:
# Earth assumed spherical with radius = 6 367 470.0 m
a = 6_367_470
b = 6_367_470
elif shapeOfTheEarth == 1:
elif shapeOfTheEarth == 1 and ds.attrs["model"] == "graphcast":
# Earth assumed spherical with radius specified (in m) by data producer
# TODO: Why is model='graphcast' using this value?
a = 4326.0
b = 4326.0
elif shapeOfTheEarth == 1 and ds.attrs["model"] in ["urma", "rtma"]:
# Earth assumed spherical with radius specified (in m) by data producer
# TODO: Why is urma and rtma using this value?
a = 6371200.0
b = 6371200.0
elif shapeOfTheEarth == 6:
# Earth assumed spherical with radius of 6,371,229.0 m
a = 6_371_229
b = 6_371_229

# Grid type definition
# https://codes.ecmwf.int/grib/format/grib2/ctables/3/1/
if da.GRIB_gridType == "lambert":
projparams = {"proj": "lcc"}
projparams["a"] = a
Expand All @@ -51,7 +64,12 @@ def get_cf_crs(ds: "xr.Dataset", variable: str | None = None) -> dict[str, Any]:
projparams["lat_2"] = da.GRIB_Latin2InDegrees

elif da.GRIB_gridType == "regular_ll":
projparams = {"proj": "latlong"}
projparams = {"proj": "longlat"}
projparams["a"] = a
projparams["b"] = b

elif da.GRIB_gridType == "regular_gg":
projparams = {"proj": "longlat"}
projparams["a"] = a
projparams["b"] = b

Expand All @@ -66,43 +84,90 @@ def get_cf_crs(ds: "xr.Dataset", variable: str | None = None) -> dict[str, Any]:
else:
raise NotImplementedError(f"gridType {da.GRIB_gridType} is not implemented.")

return CRS(projparams).to_cf()
if _return_projparams:
return projparams
else:
return CRS(projparams).to_cf()


"""
Look at how pygrib parses with this...
with pygrib.open(str(ds.local_grib)) as grb:
msg = grb.message(1)
print(msg.projparams)
Also, look at all keys with:
Also, look for clues by dumping all keys with:
grib_dump -j <filename.grib2> > filedump.json
HRRR
{'a': 6371229,
'b': 6371229,
'proj': 'lcc',
'lon_0': 262.5,
'lat_0': 38.5,
'lat_1': 38.5,
'lat_2': 38.5}
HRRR-Alaska
{'a': 6371229,
'b': 6371229,
'proj': 'stere',
'lat_ts': 60.0,
'lat_0': 90.0,
'lon_0': 225.0}
GFS
{'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
NAM
{'a': 6371229,
'b': 6371229,
'proj': 'lcc',
'lon_0': 262.5,
'lat_0': 38.5,
'lat_1': 38.5,
'lat_2': 38.5}
-----------------------------------------
Model: HRRR
pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
equal= True
Model: HRRRAK
pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 90.0, 'lat_ts': 60.0, 'lon_0': 225.0, 'proj': 'stere'}
Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 90, 'lat_ts': 60.0, 'lon_0': 225.0, 'proj': 'stere'}
equal= True
Model: GFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: GRAPHCAST
pygrib {'a': 4326.0, 'b': 4326.0, 'proj': 'longlat'}
Herbie {'a': 4326.0, 'b': 4326.0, 'proj': 'longlat'}
equal= True
Model: CFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: IFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: AIFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: GEFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: GEFS
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: HAFSA
pygrib {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
Herbie {'a': 6371229, 'b': 6371229, 'proj': 'longlat'}
equal= True
Model: HREF
pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
equal= True
Model: NAM
pygrib {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
Herbie {'a': 6371229, 'b': 6371229, 'lat_0': 38.5, 'lat_1': 38.5, 'lat_2': 38.5, 'lon_0': 262.5, 'proj': 'lcc'}
equal= True
Model: URMA
pygrib {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
Herbie {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
equal= True
Model: RTMA
pygrib {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
Herbie {'a': 6371200.0, 'b': 6371200.0, 'lat_0': 25.0, 'lat_1': 25.0, 'lat_2': 25.0, 'lon_0': 265.0, 'proj': 'lcc'}
equal= True
"""
2 changes: 1 addition & 1 deletion herbie/models/gdps.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
## April 9, 2024

"""
A Herbie template for the GEM Global or Global Deterministic Prediction System (GDPS)
A Herbie template for the GEM Global or Global Deterministic Prediction System (GDPS).
Meteorological Service of Canada (MSC)
The GDPS is Canada's 15 km deterministic global model
Expand Down
2 changes: 1 addition & 1 deletion herbie/models/hrdps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
## April 9, 2024

"""
A Herbie template for the High Resolution Deterministic Prediction System (HRDPS)
A Herbie template for the High Resolution Deterministic Prediction System (HRDPS).
Meteorological Service of Canada (MSC)
The HRDPS is Canada's 2.5 km deterministic model
Expand Down
Loading

0 comments on commit 925abb5

Please sign in to comment.