Skip to content

Commit

Permalink
herbie wait improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
karlwx committed May 28, 2024
1 parent 92a7b36 commit f7a9911
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 28 deletions.
118 changes: 100 additions & 18 deletions docs/user_guide/tutorial/latest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -123,50 +123,132 @@
"source": [
"## `HerbieWait`\n",
"\n",
"I don't have the patience to actually try this, but if you want to wait for data to become available, Herbie will use a while loop to wait for data.\n",
"If you want to wait for model data to become available in real time, Herbie will use a while loop to wait for data.\n",
"\n",
"In this example, I've changed the default wait time and interval just to demonstrate. (The error in this is expected, but if you wait long enough it _should_ work.)\n"
"In this example, I've changed the default wait time and interval just to demonstrate. (The error in this is expected.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💔 Did not find ┊ model=hrrr ┊ \u001b[3mproduct=sfc\u001b[0m ┊ \u001b[38;2;41;130;13m2024-Jan-19 19:00 UTC\u001b[92m F39\u001b[0m\n",
"💔 Did not find ┊ model=hrrr ┊ \u001b[3mproduct=sfc\u001b[0m ┊ \u001b[38;2;41;130;13m2024-Jan-19 19:00 UTC\u001b[92m F39\u001b[0m\n",
"💔 Did not find ┊ model=hrrr ┊ \u001b[3mproduct=sfc\u001b[0m ┊ \u001b[38;2;41;130;13m2024-Jan-19 19:00 UTC\u001b[92m F39\u001b[0m\n",
"💔 Did not find ┊ model=hrrr ┊ \u001b[3mproduct=sfc\u001b[0m ┊ \u001b[38;2;41;130;13m2024-Jan-19 19:00 UTC\u001b[92m F39\u001b[0m\n",
"💔 Did not find ┊ model=hrrr ┊ \u001b[3mproduct=sfc\u001b[0m ┊ \u001b[38;2;41;130;13m2024-Jan-19 19:00 UTC\u001b[92m F39\u001b[0m\n"
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n"
]
},
{
"ename": "TimeoutError",
"evalue": "Herbie did not find data in time: ║HERBIE╠ HRRR:sfc",
"evalue": "Herbie did not find data in time: ║HERBIE╠ RAP:awp130pgrb",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTimeoutError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mHerbieWait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwait_for\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m10s\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1s\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfxx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m39\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/GITHUB/Herbie/herbie/latest.py:107\u001b[0m, in \u001b[0;36mHerbieWait\u001b[0;34m(model, priority, wait_for, check_interval, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (pd\u001b[38;5;241m.\u001b[39mTimestamp(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnow\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m timer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mTimedelta(wait_for):\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHerbie did not find data in time: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[0;31mTimeoutError\u001b[0m: Herbie did not find data in time: ║HERBIE╠ HRRR:sfc"
"Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m run \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mTimestamp(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnow\u001b[39m\u001b[38;5;124m\"\u001b[39m, tz\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutc\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mfloor(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m1h\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mreplace(tzinfo\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m----> 2\u001b[0m H \u001b[38;5;241m=\u001b[39m \u001b[43mHerbieWait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrap\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproduct\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mawp130pgrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwait_for\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m10s\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1s\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfxx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/mapwall_dev/herbie-dev/herbie/latest.py:110\u001b[0m, in \u001b[0;36mHerbieWait\u001b[0;34m(run, model, priority, wait_for, check_interval, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;66;03m# Error out if timeout is exceeded\u001b[39;00m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (pd\u001b[38;5;241m.\u001b[39mTimestamp(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnow\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m timer) \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mTimedelta(wait_for):\n\u001b[0;32m--> 110\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHerbie did not find data in time: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Herbie(run, model\u001b[38;5;241m=\u001b[39mmodel, priority\u001b[38;5;241m=\u001b[39mpriority, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"\u001b[0;31mTimeoutError\u001b[0m: Herbie did not find data in time: ║HERBIE╠ RAP:awp130pgrb"
]
}
],
"source": [
"H = HerbieWait(wait_for=\"10s\", check_interval=\"1s\", fxx=39)"
"run = pd.Timestamp(\"now\", tz=\"utc\").floor('1h').replace(tzinfo=None)\n",
"H = HerbieWait(run=run, model=\"rap\", product=\"awp130pgrb\", wait_for=\"10s\", check_interval=\"1s\", fxx=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here's an example that takes a while to run, demonstrating that it works!"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"💔 Did not find ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m\n",
"✅ Found ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m ┊ \u001b[38;2;255;153;0m\u001b[3mGRIB2 @ nomads\u001b[0m ┊ \u001b[38;2;255;153;0m\u001b[3mIDX @ nomads\u001b[0m\n",
"✅ Found ┊ model=rap ┊ \u001b[3mproduct=awp130pgrb\u001b[0m ┊ \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m ┊ \u001b[38;2;255;153;0m\u001b[3mGRIB2 @ nomads\u001b[0m ┊ \u001b[38;2;255;153;0m\u001b[3mIDX @ nomads\u001b[0m\n"
]
}
],
"source": [
"H = HerbieWait(run=run, model=\"rap\", product=\"awp130pgrb\", wait_for=\"2h\", check_interval=\"120s\", fxx=0)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\u001b[48;2;255;255;255m\u001b[38;2;136;33;27m▌\u001b[0m\u001b[38;2;12;53;118m\u001b[48;2;240;234;210m▌\u001b[38;2;0;0;0m\u001b[1mHerbie\u001b[0m RAP model \u001b[3mawp130pgrb\u001b[0m product initialized \u001b[38;2;41;130;13m2024-May-21 17:00 UTC\u001b[92m F00\u001b[0m ┊ \u001b[38;2;255;153;0m\u001b[3msource=nomads\u001b[0m"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"H"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "herbie",
"display_name": "herbie-dev",
"language": "python",
"name": "python3"
"name": "herbie-dev"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -178,9 +260,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
25 changes: 15 additions & 10 deletions herbie/latest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def HerbieLatest(


def HerbieWait(
run=pd.Timestamp("now", tz="utc").floor('1h').replace(tzinfo=None),
model=config["default"].get("model"),
priority=["aws", "nomads"],
wait_for="5min",
Expand All @@ -64,6 +65,9 @@ def HerbieWait(
Parameters
----------
run : datetime or pandas.Timestamp
The model run to search for.
If not provided, the default value is the current UTC hour.
model : str
The name of the model.
priority : list
Expand All @@ -84,24 +88,25 @@ def HerbieWait(
**kwargs
Any other input you want passed to the Herbie class.
"""
now = pd.Timestamp.utcnow().floor("1h").tz_localize(None)

if isinstance(check_interval, str):
check_interval = pd.Timedelta(check_interval).total_seconds()

timer = pd.Timestamp("now")

H = Herbie(now, model=model, priority=priority, **kwargs)
H = Herbie(run, model=model, priority=priority, **kwargs)

# If H.grib does not exist, wait for it
while H.grib is None:
now = pd.Timestamp.utcnow().floor("1h").tz_localize(None)
H = Herbie(now, model=model, priority=priority, **kwargs)
if H.grib:
return H

# Wait for the specified check interval
time.sleep(check_interval)

if (pd.Timestamp("now") - timer) >= pd.Timedelta(wait_for):
# Try again; break out of loop if successful
H = Herbie(run, model=model, priority=priority, **kwargs)
if H.grib is not None:
break

raise TimeoutError(f"Herbie did not find data in time: {H}")
# Error out if timeout is exceeded
if (pd.Timestamp("now") - timer) >= pd.Timedelta(wait_for):
raise TimeoutError(f"Herbie did not find data in time: {H}")

return Herbie(run, model=model, priority=priority, **kwargs)
11 changes: 11 additions & 0 deletions tests/test_latest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Tests HerbieWait."""

from herbie import HerbieWait
import pandas as pd
import pytest


def test_HerbieWait():
run = pd.Timestamp("now", tz="utc").replace(tzinfo=None).floor('1h') + pd.Timedelta('1h')
with pytest.raises(TimeoutError):
H = HerbieWait(run, model="rap", product="awp130pgrb", wait_for="5s", check_interval="1s", fxx=0)

0 comments on commit f7a9911

Please sign in to comment.