diff --git a/lumibot/data_sources/tradier_data.py b/lumibot/data_sources/tradier_data.py index 39d408402..9ff9db89e 100644 --- a/lumibot/data_sources/tradier_data.py +++ b/lumibot/data_sources/tradier_data.py @@ -1,12 +1,12 @@ import logging from collections import defaultdict -from datetime import datetime +from datetime import datetime, date import pandas as pd import pytz from lumibot.entities import Asset, Bars -from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit +from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit, get_trading_days from lumiwealth_tradier import Tradier from .data_source import DataSource @@ -202,6 +202,14 @@ def get_historical_prices( td, _ = self.convert_timestep_str_to_timedelta(timestep) start_date = end_date - (td * length) + if timestep == 'day' and timeshift is None: + # What we really want is the last n bars, not the bars from the last n days. + # get twice as many days as we need to ensure we get enough bars + tcal_start_date = end_date - (td * length * 2) + trading_days = get_trading_days(market='NYSE', start_date=tcal_start_date, end_date=end_date) + # Now, start_date is the length bars before the last trading day + start_date = trading_days.index[-length] + # Check what timestep we are using, different endpoints are required for different timesteps try: if parsed_timestep_unit == "minute": @@ -230,6 +238,10 @@ def get_historical_prices( if "timestamp" in df.columns: df = df.drop(columns=["timestamp"]) + # if type of index is date, convert it to timestamp with timezone info of "America/New_York" + if isinstance(df.index[0], date): + df.index = pd.to_datetime(df.index, utc=True).tz_convert("America/New_York") + # Convert the dataframe to a Bars object bars = Bars(df, self.SOURCE, asset, raw=df, quote=quote) diff --git a/tests/test_bars.py b/tests/test_bars.py index 239819919..78221e650 100644 --- a/tests/test_bars.py +++ b/tests/test_bars.py @@ -21,13 +21,26 @@ logger = logging.getLogger(__name__) -print_full_pandas_dataframes() -set_pandas_float_precision(precision=15) +# print_full_pandas_dataframes() +# set_pandas_float_precision(precision=15) -class TestBarsContainReturns: - """These tests check that the bars from get_historical_prices contain returns for the different data sources.""" +class TestDatasourceDailyBars: + """These tests check that the Bars returned from get_historical_prices. + They test: + - the index is a timestamp + - they contain returns for the different data sources. + - they return the right number of bars + - returns are calculated correctly + - certain data_sources contain dividends + + """ + + length = 30 + ticker = "SPY" + asset = Asset("SPY") + timestep = "day" expected_df = None backtesting_start = datetime(2019, 3, 1) backtesting_end = datetime(2019, 3, 31) @@ -43,15 +56,21 @@ def setup_class(cls): df['expected_return'] = df['Adj Close'].pct_change() cls.expected_df = df + # @pytest.mark.skip() @pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key") @pytest.mark.skipif(ALPACA_CONFIG['API_KEY'] == '', reason="This test requires an alpaca API key") - def test_alpaca_data_source_generates_simple_returns(self): + def test_alpaca_data_source_daily_bars(self): """ - This tests that the alpaca data_source calculates SIMPLE returns for bars. Since we don't get dividends with - alpaca, we are not going to check if the returns are adjusted correctly. + Among other things, this tests that the alpaca data_source calculates SIMPLE returns for bars. + Since we don't get dividends with alpaca, we are not going to check if the returns are adjusted correctly. """ data_source = AlpacaData(ALPACA_CONFIG) - prices = data_source.get_historical_prices("SPY", 2, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + # assert prices.df.index[0].tzinfo.zone == "America/New_York" # Note, this is different from all others + assert prices.df.index[0].tzinfo == pytz.timezone("UTC") + assert len(prices.df) == self.length assert isinstance(prices.df.index[0], pd.Timestamp) @@ -61,7 +80,8 @@ def test_alpaca_data_source_generates_simple_returns(self): # check that there is no dividend column... This test will fail when dividends are added. We hope that's soon. assert "dividend" not in prices.df.columns - def test_yahoo_data_source_generates_adjusted_returns(self): + # @pytest.mark.skip() + def test_yahoo_data_source_daily_bars(self): """ This tests that the yahoo data_source calculates adjusted returns for bars and that they are calculated correctly. @@ -69,7 +89,11 @@ def test_yahoo_data_source_generates_adjusted_returns(self): start = self.backtesting_start + timedelta(days=25) end = self.backtesting_end + timedelta(days=25) data_source = YahooData(datetime_start=start, datetime_end=end) - prices = data_source.get_historical_prices("SPY", 25, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" + assert len(prices.df) == self.length # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None @@ -106,7 +130,8 @@ def test_yahoo_data_source_generates_adjusted_returns(self): rtol=0 ) - def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture): + # @pytest.mark.skip() + def test_pandas_data_source_daily_bars(self, pandas_data_fixture): """ This tests that the pandas data_source calculates adjusted returns for bars and that they are calculated correctly. It assumes that it is provided split adjusted OHLCV and dividend data. @@ -118,10 +143,10 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture datetime_end=end, pandas_data=pandas_data_fixture ) - prices = data_source.get_historical_prices("SPY", 25, "day") - + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) assert isinstance(prices.df.index[0], pd.Timestamp) - + assert prices.df.index[0].tzinfo.zone == "America/New_York" + assert len(prices.df) == self.length assert prices.df["return"].iloc[-1] is not None # check that there is a dividend column. @@ -156,8 +181,9 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture rtol=0 ) + # @pytest.mark.skip() @pytest.mark.skipif(POLYGON_API_KEY == '', reason="This test requires a Polygon.io API key") - def test_polygon_data_source_generates_simple_returns(self): + def test_polygon_data_source_daily_bars(self): """ This tests that the po broker calculates SIMPLE returns for bars. Since we don't get dividends with alpaca, we are not going to check if the returns are adjusted correctly. @@ -172,7 +198,11 @@ def test_polygon_data_source_generates_simple_returns(self): data_source = PolygonDataBacktesting( start, end, api_key=POLYGON_API_KEY ) - prices = data_source.get_historical_prices("SPY", 2, "day") + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" + assert len(prices.df) == self.length # assert that the last row has a return value assert prices.df["return"].iloc[-1] is not None @@ -190,8 +220,12 @@ def test_tradier_data_source_generates_simple_returns(self): access_token=TRADIER_CONFIG["ACCESS_TOKEN"], paper=TRADIER_CONFIG["PAPER"], ) - spy_asset = Asset("SPY") - prices = data_source.get_historical_prices(spy_asset, 2, "day") + + prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep) + + assert isinstance(prices.df.index[0], pd.Timestamp) + assert prices.df.index[0].tzinfo.zone == "America/New_York" + assert len(prices.df) == self.length # This shows a bug. The index a datetime.date but should be a timestamp # assert isinstance(prices.df.index[0], pd.Timestamp)