Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix polygon get historical prices #673

Merged
merged 21 commits into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
43a833c
fix for bug in data.py for polygon data
brettelliot Nov 28, 2024
8835f18
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 9, 2024
f05b327
Make test deterministic
brettelliot Dec 9, 2024
7525392
Added deterministic tests for thanksgiving dates.
brettelliot Dec 9, 2024
36bbf9e
make sure tests that require polygon subs are properly skipped
brettelliot Dec 10, 2024
deea225
Fixed bug in get_start_datetime_and_ts_unit; wasn't getting enough pa…
brettelliot Dec 11, 2024
e4ea8a1
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 12, 2024
1a230d0
set the startdate far back enough to get N bars but not too far back
brettelliot Dec 12, 2024
e6393f6
Fix one of the polygon tests in test_polygon
brettelliot Dec 12, 2024
b4cf8e1
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 16, 2024
99b064b
Change warning to debug message
brettelliot Dec 17, 2024
4e3f0fd
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 18, 2024
1f73603
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Dec 18, 2024
354e004
fix division by zero bug when shorting
brettelliot Dec 19, 2024
86a6f78
better tests for figuring out the get_historical_prices problem
brettelliot Dec 19, 2024
568f877
tests for backtest broker getting data in the future; fix for yahoo d…
brettelliot Dec 20, 2024
03db9c1
Fixed bug so that covering short positions works in drift rebalancer.
brettelliot Jan 2, 2025
2bfe2c9
Merge branch 'dev' into fix-polygon-get-historical-prices
brettelliot Jan 2, 2025
29bb725
remove warning that wasn't true. If the drift is 1.0 or -1.0 we will …
brettelliot Jan 2, 2025
1faa9b9
missing_dates: re-enabled code that had been removed. Updated unit te…
davidlatte Jan 8, 2025
1ff9eaa
Merge remote-tracking branch 'origin/fix-polygon-get-historical-price…
davidlatte Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions lumibot/components/drift_rebalancer_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,6 @@ def __init__(

def calculate(self, target_weights: Dict[str, Decimal]) -> pd.DataFrame:

if self.drift_type == DriftType.ABSOLUTE:
# The absolute value of all the weights are less than the drift_threshold
# then we will never trigger a rebalance.

if all([abs(weight) < self.drift_threshold for weight in target_weights.values()]):
self.strategy.logger.warning(
f"All target weights are less than the drift_threshold: {self.drift_threshold}. "
f"No rebalance will be triggered."
)

self.df = pd.DataFrame({
"symbol": target_weights.keys(),
"is_quote_asset": False,
Expand Down Expand Up @@ -222,19 +212,23 @@ def _calculate_drift_row(self, row: pd.Series) -> Decimal:
return Decimal(0)

elif row["current_weight"] == Decimal(0) and row["target_weight"] == Decimal(0):
# Should nothing change?
# Do nothing
return Decimal(0)

elif row["current_quantity"] > Decimal(0) and row["target_weight"] == Decimal(0):
# Should we sell everything
# Sell everything
return Decimal(-1)

elif row["current_quantity"] < Decimal(0) and row["target_weight"] == Decimal(0):
# Cover our short position
return Decimal(1)

elif row["current_quantity"] == Decimal(0) and row["target_weight"] > Decimal(0):
# We don't have any of this asset, but we want to buy some.
return Decimal(1)

elif row["current_quantity"] == Decimal(0) and row["target_weight"] == Decimal(-1):
# Should we short everything we have
# Short everything we have
return Decimal(-1)

elif row["current_quantity"] == Decimal(0) and row["target_weight"] < Decimal(0):
Expand Down Expand Up @@ -352,7 +346,17 @@ def _rebalance(self, df: pd.DataFrame = None) -> None:

# Execute buys
for index, row in df.iterrows():
if row["drift"] > 0:
if row["drift"] == 1 and row['current_quantity'] < 0 and self.shorting:
# Cover our short position
symbol = row["symbol"]
quantity = abs(row["current_quantity"])
last_price = Decimal(self.strategy.get_last_price(symbol))
limit_price = self.calculate_limit_price(last_price=last_price, side="buy")
order = self.place_order(symbol=symbol, quantity=quantity, limit_price=limit_price, side="buy")
buy_orders.append(order)
cash_position -= quantity * limit_price

elif row["drift"] > 0:
symbol = row["symbol"]
last_price = Decimal(self.strategy.get_last_price(symbol))
limit_price = self.calculate_limit_price(last_price=last_price, side="buy")
Expand Down
7 changes: 3 additions & 4 deletions lumibot/data_sources/alpaca_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,9 @@ def get_barset_from_api(self, asset, freq, limit=None, end=None, start=None, quo
loop_limit = limit

elif str(freq) == "1Day":
loop_limit = limit * 1.5 # number almost perfect for normal weeks where only weekends are off

# Add 3 days to the start date to make sure we get enough data on extra long weekends (like Thanksgiving)
loop_limit += 3
weeks_requested = limit // 5 # Full trading week is 5 days
extra_padding_days = weeks_requested * 3 # to account for 3day weekends
loop_limit = max(5, limit + extra_padding_days) # Get at least 5 days

df = [] # to use len(df) below without an error

Expand Down
8 changes: 6 additions & 2 deletions lumibot/data_sources/pandas_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,12 @@ def get_start_datetime_and_ts_unit(self, length, timestep, start_dt=None, start_
# Convert timestep string to timedelta and get start datetime
td, ts_unit = self.convert_timestep_str_to_timedelta(timestep)

# Multiply td by length to get the end datetime
td *= length
if ts_unit == "day":
weeks_requested = length // 5 # Full trading week is 5 days
extra_padding_days = weeks_requested * 3 # to account for 3day weekends
td = timedelta(days=length + extra_padding_days)
else:
td *= length

if start_dt is not None:
start_datetime = start_dt - td
Expand Down
3 changes: 3 additions & 0 deletions lumibot/data_sources/yahoo_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def _pull_source_symbol_bars(
end = self._datetime.replace(second=59, microsecond=999999)

if timeshift:
# Ensure timeshift is a timedelta object
if isinstance(timeshift, int):
timeshift = timedelta(days=timeshift)
end = end - timeshift

end = self.to_default_timezone(end)
Expand Down
23 changes: 10 additions & 13 deletions lumibot/tools/polygon_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,19 +411,16 @@ def get_missing_dates(df_all, asset, start, end):
dates = pd.Series(df_all.index.date).unique()
missing_dates = sorted(set(trading_dates) - set(dates))

# TODO: This code works AFAIK, But when i enable it the tests for "test_polygon_missing_day_caching" and
# i don't know why nor how to fix this code or the tests. So im leaving it disabled for now. If you have problems
# with NANs in cached polygon data, you can try to enable this code and fix the tests.

# # Find any dates with nan values in the df_all DataFrame
# missing_dates += df_all[df_all.isnull().all(axis=1)].index.date.tolist()
#
# # make sure the dates are unique
# missing_dates = list(set(missing_dates))
# missing_dates.sort()
#
# # finally, filter out any dates that are not in start/end range (inclusive)
# missing_dates = [d for d in missing_dates if start.date() <= d <= end.date()]
# Find any dates with nan values in the df_all DataFrame. This happens for some infrequently traded assets, but
# it is difficult to know if the data is actually missing or if it is just infrequent trading, query for it again.
missing_dates += df_all[df_all.isnull().all(axis=1)].index.date.tolist()

# make sure the dates are unique
missing_dates = list(set(missing_dates))
missing_dates.sort()

# finally, filter out any dates that are not in start/end range (inclusive)
missing_dates = [d for d in missing_dates if start.date() <= d <= end.date()]

return missing_dates

Expand Down
15 changes: 10 additions & 5 deletions tests/backtest/test_example_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

# Global parameters
# API Key for testing Polygon.io
POLYGON_API_KEY = os.environ.get("POLYGON_API_KEY")
POLYGON_IS_PAID_SUBSCRIPTION = os.getenv("POLYGON_IS_PAID_SUBSCRIPTION", "true").lower() not in {'false', '0', 'f', 'n', 'no'}

from lumibot.credentials import POLYGON_CONFIG

class TestExampleStrategies:
def test_stock_bracket(self):
Expand Down Expand Up @@ -208,7 +206,14 @@ def test_limit_and_trailing_stops(self):
assert round(results["total_return"] * 100, 1) >= 0.7
assert round(results["max_drawdown"]["drawdown"] * 100, 1) <= 0.2

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
def test_options_hold_to_expiry(self):
"""
Test the example strategy OptionsHoldToExpiry by running a backtest and checking that the strategy object is
Expand All @@ -227,7 +232,7 @@ def test_options_hold_to_expiry(self):
show_plot=False,
show_tearsheet=False,
save_tearsheet=False,
polygon_api_key=POLYGON_API_KEY,
polygon_api_key=POLYGON_CONFIG["API_KEY"],
)

trades_df = strat_obj.broker._trade_event_log_df
Expand Down
79 changes: 67 additions & 12 deletions tests/backtest/test_polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from datetime import timedelta

# Global parameters
# API Key for testing Polygon.io
from lumibot.credentials import POLYGON_API_KEY
from lumibot.credentials import POLYGON_CONFIG


class PolygonBacktestStrat(Strategy):
Expand Down Expand Up @@ -204,7 +203,18 @@ def verify_backtest_results(self, poly_strat_obj):
)
assert "fill" not in poly_strat_obj.order_time_tracker[stoploss_order_id]

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_restclient(self):
"""
Test Polygon REST Client with Lumibot Backtesting and real API calls to Polygon. Using the Amazon stock
Expand All @@ -219,7 +229,7 @@ def test_polygon_restclient(self):
data_source = PolygonDataBacktesting(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
api_key=POLYGON_API_KEY,
api_key=POLYGON_CONFIG['API_KEY'],
)
broker = BacktestingBroker(data_source=data_source)
poly_strat_obj = PolygonBacktestStrat(
Expand All @@ -232,7 +242,18 @@ def test_polygon_restclient(self):
assert results
self.verify_backtest_results(poly_strat_obj)

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_intraday_daterange(self):
tzinfo = pytz.timezone("America/New_York")
backtesting_start = datetime.datetime(2024, 2, 7).astimezone(tzinfo)
Expand All @@ -241,7 +262,7 @@ def test_intraday_daterange(self):
data_source = PolygonDataBacktesting(
datetime_start=backtesting_start,
datetime_end=backtesting_end,
api_key=POLYGON_API_KEY,
api_key=POLYGON_CONFIG['API_KEY'],
)
broker = BacktestingBroker(data_source=data_source)
poly_strat_obj = PolygonBacktestStrat(
Expand All @@ -256,7 +277,18 @@ def test_intraday_daterange(self):
# Assert the end datetime is before the market open of the next trading day.
assert broker.datetime == datetime.datetime.fromisoformat("2024-02-12 08:30:00-05:00")

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_legacy_backtest(self):
"""
Do the same backtest as test_polygon_restclient() but using the legacy backtest() function call instead of
Expand All @@ -283,7 +315,18 @@ def test_polygon_legacy_backtest(self):
assert results
self.verify_backtest_results(poly_strat_obj)

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_polygon_legacy_backtest2(self):
"""Test that the legacy backtest() function call works without returning the startegy object"""
# Parameters: True = Live Trading | False = Backtest
Expand All @@ -300,7 +343,7 @@ def test_polygon_legacy_backtest2(self):
show_plot=False,
show_tearsheet=False,
save_tearsheet=False,
polygon_api_key=POLYGON_API_KEY, # Testing the legacy parameter name while DeprecationWarning is active
polygon_api_key=POLYGON_CONFIG['API_KEY'], # Testing the legacy parameter name while DeprecationWarning is active
)
assert results

Expand Down Expand Up @@ -336,8 +379,9 @@ def test_pull_source_symbol_bars_with_api_call(self, polygon_data_backtesting, m
mocked_get_price_data.assert_called_once()
call_args = mocked_get_price_data.call_args

extra_padding_days = (length // 5) * 3
expected_start_date = polygon_data_backtesting.datetime_start - \
datetime.timedelta(days=length) - START_BUFFER
datetime.timedelta(days=length + extra_padding_days) - START_BUFFER

assert call_args[0][0] == polygon_data_backtesting._api_key
assert call_args[0][1] == asset
Expand All @@ -349,14 +393,25 @@ def test_pull_source_symbol_bars_with_api_call(self, polygon_data_backtesting, m

class TestPolygonDataSource:

@pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
@pytest.mark.skipif(
not POLYGON_CONFIG["API_KEY"],
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
POLYGON_CONFIG['API_KEY'] == '<your key here>',
reason="This test requires a Polygon.io API key"
)
@pytest.mark.skipif(
not POLYGON_CONFIG["IS_PAID_SUBSCRIPTION"],
reason="This test requires a paid Polygon.io API key"
)
def test_get_historical_prices(self):
tzinfo = pytz.timezone("America/New_York")
start = datetime.datetime(2024, 2, 5).astimezone(tzinfo)
end = datetime.datetime(2024, 2, 10).astimezone(tzinfo)

data_source = PolygonDataBacktesting(
start, end, api_key=POLYGON_API_KEY
start, end, api_key=POLYGON_CONFIG['API_KEY']
)
data_source._datetime = datetime.datetime(2024, 2, 7, 10).astimezone(tzinfo)
# This call will set make the data source use minute bars.
Expand Down
Loading
Loading