From 65930271c2b90005bbbb18ac01c5c005d296069a Mon Sep 17 00:00:00 2001 From: Brett Elliot Date: Mon, 11 Nov 2024 14:24:16 -0500 Subject: [PATCH 1/5] fix conflict --- tests/test_bars.py | 60 +++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/tests/test_bars.py b/tests/test_bars.py index 239819919..168243071 100644 --- a/tests/test_bars.py +++ b/tests/test_bars.py @@ -21,13 +21,26 @@ logger = logging.getLogger(__name__) -print_full_pandas_dataframes() -set_pandas_float_precision(precision=15) +# print_full_pandas_dataframes() +# set_pandas_float_precision(precision=15) -class TestBarsContainReturns: - """These tests check that the bars from get_historical_prices contain returns for the different data sources.""" +class TestDatasourceDailyBars: + """These tests check that the Barss returned from get_historical_prices. + They test: + - the index is a timestamp + - they contain returns for the different data sources. + - they return the right number of bars + - returns are calculated correctly + - certain datasources contain dividends + + """ + + length = 30 + ticker = "SPY" + asset = Asset("SPY") + timestep = "day" expected_df = None backtesting_start = datetime(2019, 3, 1) backtesting_end = datetime(2019, 3, 31) @@ -45,13 +58,16 @@ def setup_class(cls): @pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key") @pytest.mark.skipif(ALPACA_CONFIG['API_KEY'] == '', reason="This test requires an alpaca API key") - def test_alpaca_data_source_generates_simple_returns(self): + def test_alpaca_data_source_daily_bars(self): """ - This tests that the alpaca data_source calculates SIMPLE returns for bars. Since we don't get dividends with - alpaca, we are not going to check if the returns are adjusted correctly. + Among other things, this tests that the alpaca data_source calculates SIMPLE returns for bars. + Since we don't get dividends with alpaca, we are not going to check if the returns are adjusted correctly. """ data_source = AlpacaData(ALPACA_CONFIG) - prices = data_source.get_historical_prices("SPY", 2, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert len(prices.df) == self.length assert isinstance(prices.df.index[0], pd.Timestamp) @@ -61,7 +77,7 @@ def test_alpaca_data_source_generates_simple_returns(self): # check that there is no dividend column... This test will fail when dividends are added. We hope that's soon. assert "dividend" not in prices.df.columns - def test_yahoo_data_source_generates_adjusted_returns(self): + def test_yahoo_data_source_daily_bars(self): """ This tests that the yahoo data_source calculates adjusted returns for bars and that they are calculated correctly. @@ -69,7 +85,10 @@ def test_yahoo_data_source_generates_adjusted_returns(self): start = self.backtesting_start + timedelta(days=25) end = self.backtesting_end + timedelta(days=25) data_source = YahooData(datetime_start=start, datetime_end=end) - prices = data_source.get_historical_prices("SPY", 25, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert len(prices.df) == self.length # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None @@ -106,7 +125,7 @@ def test_yahoo_data_source_generates_adjusted_returns(self): rtol=0 ) - def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture): + def test_pandas_data_source_daily_bars(self, pandas_data_fixture): """ This tests that the pandas data_source calculates adjusted returns for bars and that they are calculated correctly. It assumes that it is provided split adjusted OHLCV and dividend data. @@ -118,10 +137,10 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture datetime_end=end, pandas_data=pandas_data_fixture ) - prices = data_source.get_historical_prices("SPY", 25, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) - + assert len(prices.df) == self.length assert prices.df["return"].iloc[-1] is not None # check that there is a dividend column. @@ -157,7 +176,7 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture ) @pytest.mark.skipif(POLYGON_API_KEY == '', reason="This test requires a Polygon.io API key") - def test_polygon_data_source_generates_simple_returns(self): + def test_polygon_data_source_daily_bars(self): """ This tests that the po broker calculates SIMPLE returns for bars. Since we don't get dividends with alpaca, we are not going to check if the returns are adjusted correctly. @@ -172,7 +191,10 @@ def test_polygon_data_source_generates_simple_returns(self): data_source = PolygonDataBacktesting( start, end, api_key=POLYGON_API_KEY ) - prices = data_source.get_historical_prices("SPY", 2, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert len(prices.df) == self.length # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None @@ -190,11 +212,15 @@ def test_tradier_data_source_generates_simple_returns(self): access_token=TRADIER_CONFIG["ACCESS_TOKEN"], paper=TRADIER_CONFIG["PAPER"], ) - spy_asset = Asset("SPY") - prices = data_source.get_historical_prices(spy_asset, 2, "day") + + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert len(prices.df) == self.length # This shows a bug. The index a datetime.date but should be a timestamp # assert isinstance(prices.df.index[0], pd.Timestamp) # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None + From bbb1e3f153dc6f3ffdbb0d285066202e969f4de5 Mon Sep 17 00:00:00 2001 From: Brett Elliot Date: Thu, 7 Nov 2024 18:22:04 -0500 Subject: [PATCH 2/5] make sure the index of bars returned by tradier is a timestamp --- lumibot/data_sources/tradier_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lumibot/data_sources/tradier_data.py b/lumibot/data_sources/tradier_data.py index 39d408402..23f439651 100644 --- a/lumibot/data_sources/tradier_data.py +++ b/lumibot/data_sources/tradier_data.py @@ -1,6 +1,6 @@ import logging from collections import defaultdict -from datetime import datetime +from datetime import datetime, date import pandas as pd import pytz @@ -230,6 +230,10 @@ def get_historical_prices( if "timestamp" in df.columns: df = df.drop(columns=["timestamp"]) + # if type of index is date, convert it to datetime + if isinstance(df.index[0], date): + df.index = pd.to_datetime(df.index) + # Convert the dataframe to a Bars object bars = Bars(df, self.SOURCE, asset, raw=df, quote=quote) From e2f2dd56c226baf8d48a4139ee91b7733efedcab Mon Sep 17 00:00:00 2001 From: Brett Elliot Date: Thu, 7 Nov 2024 18:37:26 -0500 Subject: [PATCH 3/5] add tests for timezone --- lumibot/data_sources/tradier_data.py | 4 ++-- tests/test_bars.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lumibot/data_sources/tradier_data.py b/lumibot/data_sources/tradier_data.py index 23f439651..9cfb7c624 100644 --- a/lumibot/data_sources/tradier_data.py +++ b/lumibot/data_sources/tradier_data.py @@ -230,9 +230,9 @@ def get_historical_prices( if "timestamp" in df.columns: df = df.drop(columns=["timestamp"]) - # if type of index is date, convert it to datetime + # if type of index is date, convert it to timestamp with timezone info of "America/New_York" if isinstance(df.index[0], date): - df.index = pd.to_datetime(df.index) + df.index = pd.to_datetime(df.index, utc=True).tz_convert("America/New_York") # Convert the dataframe to a Bars object bars = Bars(df, self.SOURCE, asset, raw=df, quote=quote) diff --git a/tests/test_bars.py b/tests/test_bars.py index 168243071..c134c32bb 100644 --- a/tests/test_bars.py +++ b/tests/test_bars.py @@ -67,6 +67,7 @@ def test_alpaca_data_source_daily_bars(self): prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length assert isinstance(prices.df.index[0], pd.Timestamp) @@ -88,6 +89,7 @@ def test_yahoo_data_source_daily_bars(self): prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length # assert that the last row has a return value @@ -138,8 +140,9 @@ def test_pandas_data_source_daily_bars(self, pandas_data_fixture): pandas_data=pandas_data_fixture ) prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) - + tz = pytz.timezone("America/New_York") assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length assert prices.df["return"].iloc[-1] is not None @@ -194,6 +197,7 @@ def test_polygon_data_source_daily_bars(self): prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length # assert that the last row has a return value @@ -216,6 +220,7 @@ def test_tradier_data_source_generates_simple_returns(self): prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length # This shows a bug. The index a datetime.date but should be a timestamp From 1e36c365da6f0b07fdedcdd61b9fc55741c0c9ee Mon Sep 17 00:00:00 2001 From: Brett Elliot Date: Thu, 7 Nov 2024 20:03:00 -0500 Subject: [PATCH 4/5] check tzinfo; note alpaca tz is UTC which is different from all others --- tests/test_bars.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_bars.py b/tests/test_bars.py index c134c32bb..1e85e2555 100644 --- a/tests/test_bars.py +++ b/tests/test_bars.py @@ -26,14 +26,14 @@ class TestDatasourceDailyBars: - """These tests check that the Barss returned from get_historical_prices. + """These tests check that the Bars returned from get_historical_prices. They test: - the index is a timestamp - they contain returns for the different data sources. - they return the right number of bars - returns are calculated correctly - - certain datasources contain dividends + - certain data_sources contain dividends """ @@ -56,6 +56,7 @@ def setup_class(cls): df['expected_return'] = df['Adj Close'].pct_change() cls.expected_df = df + @pytest.mark.skip() @pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key") @pytest.mark.skipif(ALPACA_CONFIG['API_KEY'] == '', reason="This test requires an alpaca API key") def test_alpaca_data_source_daily_bars(self): @@ -67,7 +68,8 @@ def test_alpaca_data_source_daily_bars(self): prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) - assert prices.df.index[0].tzinfo.zone == "America/New_York" + # assert prices.df.index[0].tzinfo.zone == "America/New_York" # Note, this is different from all others + assert prices.df.index[0].tzinfo == pytz.timezone("UTC") assert len(prices.df) == self.length assert isinstance(prices.df.index[0], pd.Timestamp) @@ -78,6 +80,7 @@ def test_alpaca_data_source_daily_bars(self): # check that there is no dividend column... This test will fail when dividends are added. We hope that's soon. assert "dividend" not in prices.df.columns + @pytest.mark.skip() def test_yahoo_data_source_daily_bars(self): """ This tests that the yahoo data_source calculates adjusted returns for bars and that they @@ -127,6 +130,7 @@ def test_yahoo_data_source_daily_bars(self): rtol=0 ) + @pytest.mark.skip() def test_pandas_data_source_daily_bars(self, pandas_data_fixture): """ This tests that the pandas data_source calculates adjusted returns for bars and that they @@ -140,7 +144,6 @@ def test_pandas_data_source_daily_bars(self, pandas_data_fixture): pandas_data=pandas_data_fixture ) prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) - tz = pytz.timezone("America/New_York") assert isinstance(prices.df.index[0], pd.Timestamp) assert prices.df.index[0].tzinfo.zone == "America/New_York" assert len(prices.df) == self.length @@ -178,6 +181,7 @@ def test_pandas_data_source_daily_bars(self, pandas_data_fixture): rtol=0 ) + @pytest.mark.skip() @pytest.mark.skipif(POLYGON_API_KEY == '', reason="This test requires a Polygon.io API key") def test_polygon_data_source_daily_bars(self): """ @@ -228,4 +232,3 @@ def test_tradier_data_source_generates_simple_returns(self): # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None - From 057b675e3c8e987665069938be07c0a1120e7ecd Mon Sep 17 00:00:00 2001 From: Brett Elliot Date: Thu, 7 Nov 2024 20:30:01 -0500 Subject: [PATCH 5/5] use a trading calendar to get the start date that is length bars earlier then end_date --- lumibot/data_sources/tradier_data.py | 10 +++++++++- tests/test_bars.py | 8 ++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lumibot/data_sources/tradier_data.py b/lumibot/data_sources/tradier_data.py index 9cfb7c624..9ff9db89e 100644 --- a/lumibot/data_sources/tradier_data.py +++ b/lumibot/data_sources/tradier_data.py @@ -6,7 +6,7 @@ import pytz from lumibot.entities import Asset, Bars -from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit +from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit, get_trading_days from lumiwealth_tradier import Tradier from .data_source import DataSource @@ -202,6 +202,14 @@ def get_historical_prices( td, _ = self.convert_timestep_str_to_timedelta(timestep) start_date = end_date - (td * length) + if timestep == 'day' and timeshift is None: + # What we really want is the last n bars, not the bars from the last n days. + # get twice as many days as we need to ensure we get enough bars + tcal_start_date = end_date - (td * length * 2) + trading_days = get_trading_days(market='NYSE', start_date=tcal_start_date, end_date=end_date) + # Now, start_date is the length bars before the last trading day + start_date = trading_days.index[-length] + # Check what timestep we are using, different endpoints are required for different timesteps try: if parsed_timestep_unit == "minute": diff --git a/tests/test_bars.py b/tests/test_bars.py index 1e85e2555..78221e650 100644 --- a/tests/test_bars.py +++ b/tests/test_bars.py @@ -56,7 +56,7 @@ def setup_class(cls): df['expected_return'] = df['Adj Close'].pct_change() cls.expected_df = df - @pytest.mark.skip() + # @pytest.mark.skip() @pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key") @pytest.mark.skipif(ALPACA_CONFIG['API_KEY'] == '', reason="This test requires an alpaca API key") def test_alpaca_data_source_daily_bars(self): @@ -80,7 +80,7 @@ def test_alpaca_data_source_daily_bars(self): # check that there is no dividend column... This test will fail when dividends are added. We hope that's soon. assert "dividend" not in prices.df.columns - @pytest.mark.skip() + # @pytest.mark.skip() def test_yahoo_data_source_daily_bars(self): """ This tests that the yahoo data_source calculates adjusted returns for bars and that they @@ -130,7 +130,7 @@ def test_yahoo_data_source_daily_bars(self): rtol=0 ) - @pytest.mark.skip() + # @pytest.mark.skip() def test_pandas_data_source_daily_bars(self, pandas_data_fixture): """ This tests that the pandas data_source calculates adjusted returns for bars and that they @@ -181,7 +181,7 @@ def test_pandas_data_source_daily_bars(self, pandas_data_fixture): rtol=0 ) - @pytest.mark.skip() + # @pytest.mark.skip() @pytest.mark.skipif(POLYGON_API_KEY == '', reason="This test requires a Polygon.io API key") def test_polygon_data_source_daily_bars(self): """