Skip to content

Commit

Permalink
updated elegantrl agents for latest elegentrl release (#1154)
Browse files Browse the repository at this point in the history
* updated elegantrl agents for latest elegentrl release

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: krish-athenasoft <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 15, 2024
1 parent c0f42fb commit 34a5a4b
Showing 1 changed file with 100 additions and 63 deletions.
163 changes: 100 additions & 63 deletions finrl/agents/elegantrl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
from __future__ import annotations

import torch
from elegantrl.agents import AgentDDPG
from elegantrl.agents import AgentPPO
from elegantrl.agents import AgentSAC
from elegantrl.agents import AgentTD3
from elegantrl.train.config import Arguments
from elegantrl.train.run import init_agent
from elegantrl.train.run import train_and_evaluate
from elegantrl.agents import *
from elegantrl.train.config import Config
from elegantrl.train.run import train_agent

# from elegantrl.agents import AgentA2C

MODELS = {"ddpg": AgentDDPG, "td3": AgentTD3, "sac": AgentSAC, "ppo": AgentPPO}
MODELS = {
"ddpg": AgentDDPG,
"td3": AgentTD3,
"sac": AgentSAC,
"ppo": AgentPPO,
"a2c": AgentA2C,
}
OFF_POLICY_MODELS = ["ddpg", "td3", "sac"]
ON_POLICY_MODELS = ["ppo"]
# MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
Expand Down Expand Up @@ -49,29 +49,51 @@ def __init__(self, env, price_array, tech_array, turbulence_array):
self.turbulence_array = turbulence_array

def get_model(self, model_name, model_kwargs):
env_config = {
self.env_config = {
"price_array": self.price_array,
"tech_array": self.tech_array,
"turbulence_array": self.turbulence_array,
"if_train": True,
}
env = self.env(config=env_config)
self.model_kwargs = model_kwargs
self.gamma = model_kwargs.get("gamma", 0.985)

env = self.env
env.env_num = 1
agent = MODELS[model_name]
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
model = Arguments(agent_class=agent, env=env)

stock_dim = self.price_array.shape[1]
self.state_dim = 1 + 2 + 3 * stock_dim + self.tech_array.shape[1]
self.action_dim = stock_dim
self.env_args = {
"env_name": "StockEnv",
"config": self.env_config,
"state_dim": self.state_dim,
"action_dim": self.action_dim,
"if_discrete": False,
"max_step": self.price_array.shape[0] - 1,
}

model = Config(agent_class=agent, env_class=env, env_args=self.env_args)
model.if_off_policy = model_name in OFF_POLICY_MODELS
if model_kwargs is not None:
try:
model.learning_rate = model_kwargs["learning_rate"]
model.batch_size = model_kwargs["batch_size"]
model.gamma = model_kwargs["gamma"]
model.seed = model_kwargs["seed"]
model.net_dim = model_kwargs["net_dimension"]
model.target_step = model_kwargs["target_step"]
model.eval_gap = model_kwargs["eval_gap"]
model.eval_times = model_kwargs["eval_times"]
model.break_step = int(
2e5
) # break training if 'total_step > break_step'
model.net_dims = (
128,
64,
) # the middle layer dimension of MultiLayer Perceptron
model.gamma = self.gamma # discount factor of future rewards
model.horizon_len = model.max_step
model.repeat_times = 16 # repeatedly update network using ReplayBuffer to keep critic's loss small
model.learning_rate = model_kwargs.get("learning_rate", 1e-4)
model.state_value_tau = 0.1 # the tau of normalize for value and state `std = (1-std)*std + tau*std`
model.eval_times = model_kwargs.get("eval_times", 2**5)
model.eval_per_step = int(2e4)
except BaseException:
raise ValueError(
"Fail to read arguments, please check 'model_kwargs' input."
Expand All @@ -81,51 +103,66 @@ def get_model(self, model_name, model_kwargs):
def train_model(self, model, cwd, total_timesteps=5000):
model.cwd = cwd
model.break_step = total_timesteps
train_and_evaluate(model)
train_agent(model)

@staticmethod
def DRL_prediction(model_name, cwd, net_dimension, environment):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
agent = MODELS[model_name]
environment.env_num = 1
args = Arguments(agent_class=agent, env=environment)
def DRL_prediction(model_name, cwd, net_dimension, environment, env_args):
import torch

gpu_id = 0 # >=0 means GPU ID, -1 means CPU
agent_class = MODELS[model_name]
stock_dim = env_args["price_array"].shape[1]
state_dim = 1 + 2 + 3 * stock_dim + env_args["tech_array"].shape[1]
action_dim = stock_dim
env_args = {
"env_num": 1,
"env_name": "StockEnv",
"state_dim": state_dim,
"action_dim": action_dim,
"if_discrete": False,
"max_step": env_args["price_array"].shape[0] - 1,
"config": env_args,
}

actor_path = f"{cwd}/act.pth"
net_dim = [2**7]

"""init"""
env = environment
env_class = env
args = Config(agent_class=agent_class, env_class=env_class, env_args=env_args)
args.cwd = cwd
args.net_dim = net_dimension
# load agent
try:
agent = init_agent(args, gpu_id=0)
act = agent.act
device = agent.device
except BaseException:
raise ValueError("Fail to load agent!")

# test on the testing env
_torch = torch
state = environment.reset()
act = agent_class(
net_dim, env.state_dim, env.action_dim, gpu_id=gpu_id, args=args
).act
parameters_dict = {}
act = torch.load(actor_path)
for name, param in act.named_parameters():
parameters_dict[name] = torch.tensor(param.detach().cpu().numpy())

act.load_state_dict(parameters_dict)

if_discrete = env.if_discrete
device = next(act.parameters()).device
state = env.reset()
episode_returns = [] # the cumulative_return / initial_account
episode_total_assets = [environment.initial_total_asset]
with _torch.no_grad():
for i in range(environment.max_step):
s_tensor = _torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor) # action_tanh = act.forward()
action = (
a_tensor.detach().cpu().numpy()[0]
) # not need detach(), because with torch.no_grad() outside
state, reward, done, _ = environment.step(action)

total_asset = (
environment.amount
+ (
environment.price_ary[environment.day] * environment.stocks
).sum()
)
episode_total_assets.append(total_asset)
episode_return = total_asset / environment.initial_total_asset
episode_returns.append(episode_return)
if done:
break
episode_total_assets = [env.initial_total_asset]
max_step = env.max_step
for steps in range(max_step):
s_tensor = torch.as_tensor(
state, dtype=torch.float32, device=device
).unsqueeze(0)
a_tensor = act(s_tensor).argmax(dim=1) if if_discrete else act(s_tensor)
action = (
a_tensor.detach().cpu().numpy()[0]
) # not need detach(), because using torch.no_grad() outside
state, reward, done, _ = env.step(action)
total_asset = env.amount + (env.price_ary[env.day] * env.stocks).sum()
episode_total_assets.append(total_asset)
episode_return = total_asset / env.initial_total_asset
episode_returns.append(episode_return)
if done:
break
print("Test Finished!")
# return episode total_assets on testing data
print("episode_return", episode_return)
print("episode_retuen", episode_return)
return episode_total_assets

0 comments on commit 34a5a4b

Please sign in to comment.