Skip to content

Commit

Permalink
Merge pull request #606 from Lumiwealth/temp-revert-backtesting-optim…
Browse files Browse the repository at this point in the history
…izations

Temp Revert backtesting optimizations
  • Loading branch information
grzesir authored Nov 1, 2024
2 parents f0a72d1 + de8e8c8 commit 95b2aa9
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 153 deletions.
68 changes: 18 additions & 50 deletions lumibot/backtesting/backtesting_broker.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,34 +117,20 @@ def should_continue(self):
def is_market_open(self):
"""Return True if market is open else false"""
now = self.datetime
increased_idx = False

if not hasattr(self, '_cached_idx_right'):
idx = self._trading_days.index.searchsorted(now, side='right')
self._cached_idx_right = idx
else:
if now > self._previous_market_close_right:
self._cached_idx_right += 1
increased_idx = True

idx = self._cached_idx_right

if hasattr(self, '_previous_market_close_right') and not increased_idx:
market_close = self._previous_market_close_right
market_open = self._previous_market_open_right
else:
if idx >= len(self._trading_days):
logging.error("Cannot predict future")
return 0
# As the index is sorted, use searchsorted to find the relevant day
idx = self._trading_days.index.searchsorted(now, side='right')

# The index of the trading_day is used as the market close time
market_close = self._trading_days.index[idx]
# Check that the index is not out of bounds
if idx >= len(self._trading_days):
logging.error("Cannot predict future")
return False

# Retrieve market open time using .at since idx is a valid datetime index
market_open = self._trading_days.at[market_close, 'market_open']
# The index of the trading_day is used as the market close time
market_close = self._trading_days.index[idx]

self._previous_market_open_right = market_open
self._previous_market_close_right = market_close
# Retrieve market open time using .at since idx is a valid datetime index
market_open = self._trading_days.at[market_close, 'market_open']

# Check if 'now' is within the trading hours of the located day
return market_open <= now < market_close
Expand Down Expand Up @@ -185,36 +171,18 @@ def get_time_to_open(self):
def get_time_to_close(self):
"""Return the remaining time for the market to close in seconds"""
now = self.datetime
increased_idx = False

# Use searchsorted for efficient searching and reduce unnecessary DataFrame access
if not hasattr(self, '_cached_idx'):
idx = self._trading_days.index.searchsorted(now, side='left')
self._cached_idx = idx
else:
if now > self._previous_market_close:
self._cached_idx += 1
increased_idx = True

idx = self._cached_idx

if hasattr(self, '_previous_market_close') and not increased_idx:
# Skip computation if done already
market_close = self._previous_market_close
market_open = self._previous_market_open

else:
if idx >= len(self._trading_days):
logging.error("Cannot predict future")
return 0
idx = self._trading_days.index.searchsorted(now, side='left')

# Directly access the data needed using more efficient methods
market_close_time = self._trading_days.index[idx]
market_open = self._trading_days.at[market_close_time, 'market_open']
market_close = market_close_time # Assuming this is a scalar value directly from the index
if idx >= len(self._trading_days):
logging.error("Cannot predict future")
return 0

self._previous_market_open = market_open
self._previous_market_close = market_close
# Directly access the data needed using more efficient methods
market_close_time = self._trading_days.index[idx]
market_open = self._trading_days.at[market_close_time, 'market_open']
market_close = market_close_time # Assuming this is a scalar value directly from the index

if now < market_open:
return None
Expand Down
115 changes: 80 additions & 35 deletions lumibot/entities/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .asset import Asset
from .dataline import Dataline
from functools import lru_cache


class Data:
"""Input and manage Pandas dataframes for backtesting.
Expand Down Expand Up @@ -278,16 +278,19 @@ def repair_times_and_fill(self, idx):
idx = idx[(idx >= self.datetime_start) & (idx <= self.datetime_end)]

# After all time series merged, adjust the local dataframe to reindex and fill nan's.
self.df = self.df.reindex(idx, method="ffill")
self.df.loc[self.df["volume"].isna(), "volume"] = 0
self.df.loc[:, ~self.df.columns.isin(["open", "high", "low"])] = self.df.loc[
:, ~self.df.columns.isin(["open", "high", "low"])
df = self.df.reindex(idx, method="ffill")
df.loc[df["volume"].isna(), "volume"] = 0
df.loc[:, ~df.columns.isin(["open", "high", "low"])] = df.loc[
:, ~df.columns.isin(["open", "high", "low"])
].ffill()
for col in ["open", "high", "low"]:
self.df.loc[self.df[col].isna(), col] = self.df.loc[self.df[col].isna(), "close"]
df.loc[df[col].isna(), col] = df.loc[df[col].isna(), "close"]

self.df = df

iter_index = pd.Series(self.df.index)
iter_index = pd.Series(df.index)
self.iter_index = pd.Series(iter_index.index, index=iter_index)
self.iter_index_dict = self.iter_index.to_dict()

self.datalines = dict()
self.to_datalines()
Expand All @@ -298,7 +301,8 @@ def to_datalines(self):
"datetime": Dataline(
self.asset,
"datetime",
self.df.index
self.df.index.to_numpy(),
self.df.index.dtype,
)
}
)
Expand All @@ -310,33 +314,74 @@ def to_datalines(self):
column: Dataline(
self.asset,
column,
self.df[column]
)
}
)
self.df[column].to_numpy(),
self.df[column].dtype,
)
}
)
setattr(self, column, self.datalines[column].dataline)


def get_iter_count(self, dt):
# Return the index location for a given datetime.

# Check if the date is in the dataframe, if not then get the last
# known data (this speeds up the process)
i = None

# Check if we have the iter_index_dict, if not then repair the times and fill (which will create the iter_index_dict)
if getattr(self, "iter_index_dict", None) is None:
self.repair_times_and_fill(self.df.index)

# Search for dt in self.iter_index_dict
if dt in self.iter_index_dict:
i = self.iter_index_dict[dt]
else:
# If not found, get the last known data
i = self.iter_index.asof(dt)

return i

def check_data(func):
# Validates if the provided date, length, timeshift, and timestep
# will return data. Runs function if data, returns None if no data.
def checker(self, *args, **kwargs):
if type(kwargs.get("length", 1)) not in [int, float]:
raise TypeError(f"Length must be an integer. {type(kwargs.get('length', 1))} was provided.")

dt = args[0]

# Check if the iter date is outside of this data's date range.
if dt < self.datetime_start:
raise ValueError(
f"The date you are looking for ({dt}) for ({self.asset}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}). This could be because the data for this asset does not exist for the date you are looking for, or something else."
)

# Search for dt in self.iter_index
if getattr(self, "iter_index", None) is None:
# Search for dt in self.iter_index_dict
if getattr(self, "iter_index_dict", None) is None:
self.repair_times_and_fill(self.df.index)

if dt in self.iter_index_dict:
i = self.iter_index_dict[dt]
else:
# If not found, get the last known data
i = self.iter_index.asof(dt)

length = kwargs.get("length", 1)
timeshift = kwargs.get("timeshift", 0)
data_index = i + 1 - length - timeshift
is_data = data_index >= 0
if not is_data:
# Log a warning
logging.warning(
f"The date you are looking for ({dt}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}) after accounting for a length of {kwargs.get('length', 1)} and a timeshift of {kwargs.get('timeshift', 0)}. Keep in mind that the length you are requesting must also be available in your data, in this case we are {data_index} rows away from the data you need."
)

res = func(self, *args, **kwargs)
# print(f"Results last price: {res}")
return res

return checker

@lru_cache(maxsize=32)
@check_data
def get_iter_count(self, dt):
return self.iter_index.index.searchsorted(dt, side='right') - 1

@check_data
def get_last_price(self, dt, length=1, timeshift=0):
"""Returns the last known price of the data.
Expand All @@ -357,8 +402,8 @@ def get_last_price(self, dt, length=1, timeshift=0):
float
"""
iter_count = self.get_iter_count(dt)
open_price = self.datalines["open"].dataline.iloc[iter_count]
close_price = self.datalines["close"].dataline.iloc[iter_count]
open_price = self.datalines["open"].dataline[iter_count]
close_price = self.datalines["close"].dataline[iter_count]
price = close_price if dt > self.datalines["datetime"].dataline[iter_count] else open_price
return price

Expand All @@ -382,19 +427,19 @@ def get_quote(self, dt, length=1, timeshift=0):
dict
"""
iter_count = self.get_iter_count(dt)
open = round(self.datalines["open"].dataline.iloc[iter_count], 2)
high = round(self.datalines["high"].dataline.iloc[iter_count], 2)
low = round(self.datalines["low"].dataline.iloc[iter_count], 2)
close = round(self.datalines["close"].dataline.iloc[iter_count], 2)
bid = round(self.datalines["bid"].dataline.iloc[iter_count], 2)
ask = round(self.datalines["ask"].dataline.iloc[iter_count], 2)
volume = round(self.datalines["volume"].dataline.iloc[iter_count], 0)
bid_size = round(self.datalines["bid_size"].dataline.iloc[iter_count], 0)
bid_condition = round(self.datalines["bid_condition"].dataline.iloc[iter_count], 0)
bid_exchange = round(self.datalines["bid_exchange"].dataline.iloc[iter_count], 0)
ask_size = round(self.datalines["ask_size"].dataline.iloc[iter_count], 0)
ask_condition = round(self.datalines["ask_condition"].dataline.iloc[iter_count], 0)
ask_exchange = round(self.datalines["ask_exchange"].dataline.iloc[iter_count], 0)
open = round(self.datalines["open"].dataline[iter_count], 2)
high = round(self.datalines["high"].dataline[iter_count], 2)
low = round(self.datalines["low"].dataline[iter_count], 2)
close = round(self.datalines["close"].dataline[iter_count], 2)
bid = round(self.datalines["bid"].dataline[iter_count], 2)
ask = round(self.datalines["ask"].dataline[iter_count], 2)
volume = round(self.datalines["volume"].dataline[iter_count], 0)
bid_size = round(self.datalines["bid_size"].dataline[iter_count], 0)
bid_condition = round(self.datalines["bid_condition"].dataline[iter_count], 0)
bid_exchange = round(self.datalines["bid_exchange"].dataline[iter_count], 0)
ask_size = round(self.datalines["ask_size"].dataline[iter_count], 0)
ask_condition = round(self.datalines["ask_condition"].dataline[iter_count], 0)
ask_exchange = round(self.datalines["ask_exchange"].dataline[iter_count], 0)

return {
"open": open,
Expand Down
5 changes: 3 additions & 2 deletions lumibot/entities/dataline.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class Dataline:
def __init__(self, asset, name, dataline):
def __init__(self, asset, name, dataline, dtype):
self.asset = asset
self.name = name
self.dataline = dataline
self.dataline = dataline
self.dtype = dtype
5 changes: 1 addition & 4 deletions lumibot/strategies/strategy_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,10 +1032,7 @@ def run(self):
market = self.broker.market

# Get the trading days based on the market that the strategy is trading on
if self.strategy.is_backtesting:
self.broker._trading_days = get_trading_days(market=market, start_date=self.strategy._backtesting_start)
else:
self.broker._trading_days = get_trading_days(market)
self.broker._trading_days = get_trading_days(market)

# Sort the trading days by market close time so that we can search them faster
self.broker._trading_days.sort_values('market_close', inplace=True) # Ensure sorted order
Expand Down
Loading

0 comments on commit 95b2aa9

Please sign in to comment.