Merge pull request #619 from Lumiwealth/get-bars-fixes

Lumiwealth · Nov 11, 2024 · 65880bc · 65880bc
2 parents 316fe65 + 057b675
commit 65880bc
Showing 2 changed files with 66 additions and 20 deletions.
diff --git a/lumibot/data_sources/tradier_data.py b/lumibot/data_sources/tradier_data.py
@@ -1,12 +1,12 @@
 import logging
 from collections import defaultdict
-from datetime import datetime
+from datetime import datetime, date
 
 import pandas as pd
 import pytz
 
 from lumibot.entities import Asset, Bars
-from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit
+from lumibot.tools.helpers import create_options_symbol, parse_timestep_qty_and_unit, get_trading_days
 from lumiwealth_tradier import Tradier
 
 from .data_source import DataSource
@@ -202,6 +202,14 @@ def get_historical_prices(
         td, _ = self.convert_timestep_str_to_timedelta(timestep)
         start_date = end_date - (td * length)
 
+        if timestep == 'day' and timeshift is None:
+            # What we really want is the last n bars, not the bars from the last n days.
+            # get twice as many days as we need to ensure we get enough bars
+            tcal_start_date = end_date - (td * length * 2)
+            trading_days = get_trading_days(market='NYSE', start_date=tcal_start_date, end_date=end_date)
+            # Now, start_date is the length bars before the last trading day
+            start_date = trading_days.index[-length]
+
         # Check what timestep we are using, different endpoints are required for different timesteps
         try:
             if parsed_timestep_unit == "minute":
@@ -230,6 +238,10 @@ def get_historical_prices(
         if "timestamp" in df.columns:
             df = df.drop(columns=["timestamp"])
 
+        # if type of index is date, convert it to timestamp with timezone info of "America/New_York"
+        if isinstance(df.index[0], date):
+            df.index = pd.to_datetime(df.index, utc=True).tz_convert("America/New_York")
+
         # Convert the dataframe to a Bars object
         bars = Bars(df, self.SOURCE, asset, raw=df, quote=quote)
 

diff --git a/tests/test_bars.py b/tests/test_bars.py
@@ -21,13 +21,26 @@
 
 
 logger = logging.getLogger(__name__)
-print_full_pandas_dataframes()
-set_pandas_float_precision(precision=15)
+# print_full_pandas_dataframes()
+# set_pandas_float_precision(precision=15)
 
 
-class TestBarsContainReturns:
-    """These tests check that the bars from get_historical_prices contain returns for the different data sources."""
+class TestDatasourceDailyBars:
+    """These tests check that the Bars returned from get_historical_prices.
 
+     They test:
+        - the index is a timestamp
+        - they contain returns for the different data sources.
+        - they return the right number of bars
+        - returns are calculated correctly
+        - certain data_sources contain dividends
+
+     """
+
+    length = 30
+    ticker = "SPY"
+    asset = Asset("SPY")
+    timestep = "day"
     expected_df = None
     backtesting_start = datetime(2019, 3, 1)
     backtesting_end = datetime(2019, 3, 31)
@@ -43,15 +56,21 @@ def setup_class(cls):
         df['expected_return'] = df['Adj Close'].pct_change()
         cls.expected_df = df
 
+    # @pytest.mark.skip()
     @pytest.mark.skipif(not ALPACA_CONFIG['API_KEY'], reason="This test requires an alpaca API key")
     @pytest.mark.skipif(ALPACA_CONFIG['API_KEY'] == '<your key here>', reason="This test requires an alpaca API key")
-    def test_alpaca_data_source_generates_simple_returns(self):
+    def test_alpaca_data_source_daily_bars(self):
         """
-        This tests that the alpaca data_source calculates SIMPLE returns for bars. Since we don't get dividends with
-        alpaca, we are not going to check if the returns are adjusted correctly.
+        Among other things, this tests that the alpaca data_source calculates SIMPLE returns for bars.
+        Since we don't get dividends with alpaca, we are not going to check if the returns are adjusted correctly.
         """
         data_source = AlpacaData(ALPACA_CONFIG)
-        prices = data_source.get_historical_prices("SPY", 2, "day")
+        prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
+
+        assert isinstance(prices.df.index[0], pd.Timestamp)
+        # assert prices.df.index[0].tzinfo.zone == "America/New_York"  # Note, this is different from all others
+        assert prices.df.index[0].tzinfo == pytz.timezone("UTC")
+        assert len(prices.df) == self.length
 
         assert isinstance(prices.df.index[0], pd.Timestamp)
 
@@ -61,15 +80,20 @@ def test_alpaca_data_source_generates_simple_returns(self):
         # check that there is no dividend column... This test will fail when dividends are added. We hope that's soon.
         assert "dividend" not in prices.df.columns
 
-    def test_yahoo_data_source_generates_adjusted_returns(self):
+    # @pytest.mark.skip()
+    def test_yahoo_data_source_daily_bars(self):
         """
         This tests that the yahoo data_source calculates adjusted returns for bars and that they
         are calculated correctly.
         """
         start = self.backtesting_start + timedelta(days=25)
         end = self.backtesting_end + timedelta(days=25)
         data_source = YahooData(datetime_start=start, datetime_end=end)
-        prices = data_source.get_historical_prices("SPY", 25, "day")
+        prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
+
+        assert isinstance(prices.df.index[0], pd.Timestamp)
+        assert prices.df.index[0].tzinfo.zone == "America/New_York"
+        assert len(prices.df) == self.length
 
         # assert that the last row has a return value
         assert prices.df["return"].iloc[-1] is not None
@@ -106,7 +130,8 @@ def test_yahoo_data_source_generates_adjusted_returns(self):
             rtol=0
         )
 
-    def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture):
+    # @pytest.mark.skip()
+    def test_pandas_data_source_daily_bars(self, pandas_data_fixture):
         """
         This tests that the pandas data_source calculates adjusted returns for bars and that they
         are calculated correctly. It assumes that it is provided split adjusted OHLCV and dividend data.
@@ -118,10 +143,10 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture
             datetime_end=end,
             pandas_data=pandas_data_fixture
         )
-        prices = data_source.get_historical_prices("SPY", 25, "day")
-
+        prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
         assert isinstance(prices.df.index[0], pd.Timestamp)
-
+        assert prices.df.index[0].tzinfo.zone == "America/New_York"
+        assert len(prices.df) == self.length
         assert prices.df["return"].iloc[-1] is not None
 
         # check that there is a dividend column.
@@ -156,8 +181,9 @@ def test_pandas_data_source_generates_adjusted_returns(self, pandas_data_fixture
             rtol=0
         )
 
+    # @pytest.mark.skip()
     @pytest.mark.skipif(POLYGON_API_KEY == '<your key here>', reason="This test requires a Polygon.io API key")
-    def test_polygon_data_source_generates_simple_returns(self):
+    def test_polygon_data_source_daily_bars(self):
         """
         This tests that the po broker calculates SIMPLE returns for bars. Since we don't get dividends with
         alpaca, we are not going to check if the returns are adjusted correctly.
@@ -172,7 +198,11 @@ def test_polygon_data_source_generates_simple_returns(self):
         data_source = PolygonDataBacktesting(
             start, end, api_key=POLYGON_API_KEY
         )
-        prices = data_source.get_historical_prices("SPY", 2, "day")
+        prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
+
+        assert isinstance(prices.df.index[0], pd.Timestamp)
+        assert prices.df.index[0].tzinfo.zone == "America/New_York"
+        assert len(prices.df) == self.length
 
         # assert that the last row has a return value
         assert prices.df["return"].iloc[-1] is not None
@@ -190,8 +220,12 @@ def test_tradier_data_source_generates_simple_returns(self):
                 access_token=TRADIER_CONFIG["ACCESS_TOKEN"],
                 paper=TRADIER_CONFIG["PAPER"],
         )
-        spy_asset = Asset("SPY")
-        prices = data_source.get_historical_prices(spy_asset, 2, "day")
+
+        prices = data_source.get_historical_prices(asset=self.asset, length=self.length, timestep=self.timestep)
+
+        assert isinstance(prices.df.index[0], pd.Timestamp)
+        assert prices.df.index[0].tzinfo.zone == "America/New_York"
+        assert len(prices.df) == self.length
 
         # This shows a bug. The index a datetime.date but should be a timestamp
         # assert isinstance(prices.df.index[0], pd.Timestamp)