Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/proposal market #416

Draft
wants to merge 16 commits into
base: develop
Choose a base branch
from
Prev Previous commit
Next Next commit
feat: RL agent
TomMcL committed May 4, 2023
commit 096c8eab9be61ce3316b1fc0c0401f96daf8468a
14 changes: 7 additions & 7 deletions vega_sim/reinforcement/v2/agents/puppets.py
Original file line number Diff line number Diff line change
@@ -110,26 +110,26 @@ def initialise(self, vega: VegaService, create_wallet: bool = True):
self.market_id = self.vega.find_market_id(name=self.market_name)

def step(self, vega_state: VegaState):
if (
self.action is not None
and self.action is not NoAction
and self.action.side != Side.NONE
):
if self.action is not None and self.action is not NoAction:
try:
self.vega.cancel_order(
trading_key=self.key_name, market_id=self.market_id
)
self.vega.submit_order(
trading_key=self.key_name,
market_id=self.market_id,
side=("SIDE_BUY" if self.action.side == Side.BUY else "SIDE_SELL"),
side=(
"SIDE_BUY"
if self.action.side == ForcedSide.BUY
else "SIDE_SELL"
),
volume=self.action.volume,
time_in_force=vega_protos.vega.Order.TimeInForce.TIME_IN_FORCE_GTC,
order_type=vega_protos.vega.Order.Type.TYPE_LIMIT,
pegged_order=PeggedOrder(
reference=(
vega_protos.vega.PeggedReference.PEGGED_REFERENCE_BEST_BID
if self.action.side == Side.BUY
if self.action.side == ForcedSide.BUY
else vega_protos.vega.PeggedReference.PEGGED_REFERENCE_BEST_ASK
),
offset=0,
5 changes: 4 additions & 1 deletion vega_sim/reinforcement/v2/learning_environment.py
Original file line number Diff line number Diff line change
@@ -74,10 +74,13 @@ def step(self, actions: Dict[str, Optional[Action]]) -> Dict[str, StepResult]:
for agent_name, reward_gen in self._agent_to_reward.items():
step_res[agent_name] = StepResult(
observation=self._extract_observation(agent_name),
reward=reward_gen.get_reward(self._vega),
reward=self.calculate_reward(reward_gen),
)
return step_res

def calculate_reward(self, rewarder: Type[BaseRewarder]) -> float:
return rewarder.get_reward(vega=self._vega)

def _reset_vega(self) -> None:
self._vega.stop()
self._vega = VegaServiceNull(
23 changes: 19 additions & 4 deletions vega_sim/reinforcement/v2/stable_baselines/environment.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@

import gymnasium as gym
from gymnasium import spaces
from typing import Type
from typing import Type, Optional
from enum import Enum
from stable_baselines3.common.callbacks import BaseCallback

@@ -13,7 +13,7 @@
AGENT_TYPE_TO_ACTION,
)
from vega_sim.reinforcement.v2.learning_environment import Environment
from vega_sim.reinforcement.v2.rewards import Reward
from vega_sim.reinforcement.v2.rewards import Reward, REWARD_ENUM_TO_CLASS
from vega_sim.reinforcement.v2.stable_baselines.states import (
price_state_with_fees_obs_space,
position_state_with_fees_obs_space,
@@ -53,6 +53,7 @@ def __init__(
num_levels_state: int = 5,
trade_volume: float = 1,
steps_per_trading_session: int = 1000,
terminal_reward_type: Optional[Reward] = None,
):
super().__init__()
self.num_levels_state = num_levels_state
@@ -62,6 +63,7 @@ def __init__(
self.steps_per_trading_session = steps_per_trading_session
self.current_step = 0
self.learner_name = "learner_1"
self.terminal_reward_type = terminal_reward_type

# Define action and observation space
# They must be gym.spaces objects
@@ -79,6 +81,7 @@ def __init__(
initial_asset_mint=1e8,
step_length_seconds=1,
block_length_seconds=1,
# market_maker_assumed_market_kappa=0.8,
buy_intensity=5,
sell_intensity=5,
market_name="ETH",
@@ -127,10 +130,22 @@ def step(self, action):
)[self.learner_name]
self.current_step += 1

is_terminal = self.current_step >= self.steps_per_trading_session

if is_terminal and self.terminal_reward_type is not None:
terminal_reward = REWARD_ENUM_TO_CLASS[self.terminal_reward_type](
agent_key=self.learner_name,
asset_id=self.env._asset_id,
market_id=self.env._market_id,
)
reward = step_res.reward + self.env.calculate_reward(terminal_reward)
else:
reward = step_res.reward

return (
step_res.observation.to_array(),
step_res.reward,
self.current_step >= self.steps_per_trading_session,
reward,
is_terminal,
False,
{},
)
23 changes: 18 additions & 5 deletions vega_sim/reinforcement/v2/stable_baselines/run.py
Original file line number Diff line number Diff line change
@@ -7,15 +7,28 @@
e = env.SingleAgentVegaEnv(
action_type=env.ActionType.AT_TOUCH_ONE_SIDE,
steps_per_trading_session=200,
reward_type=env.Reward.SQ_INVENTORY_PENALTY,
state_type=PositionOnly,
reward_type=env.Reward.PNL,
terminal_reward_type=env.Reward.SQ_INVENTORY_PENALTY,
state_type=PriceStateWithFees,
)
model = PPO(
model = DQN(
"MlpPolicy",
e,
verbose=1,
tensorboard_log="./ppo_tensorboard/",
# target_update_interval=200,
n_steps=200,
target_update_interval=200,
learning_starts=1000,
train_freq=100,
buffer_size=10_000,
learning_rate=0.01,
batch_size=100,
).learn(total_timesteps=1_000_000)
# model = PPO(
# "MlpPolicy",
# e,
# verbose=1,
# tensorboard_log="./ppo_tensorboard/",
# # target_update_interval=200,
# n_steps=600,
# batch_size=100,
# ).learn(total_timesteps=1_000_000)
13 changes: 9 additions & 4 deletions vega_sim/scenario/configurable_market/scenario.py
Original file line number Diff line number Diff line change
@@ -39,10 +39,10 @@
class ConfigurableMarket(Scenario):
def __init__(
self,
market_name: str = None,
market_code: str = None,
asset_name: str = None,
asset_dp: str = None,
market_name: Optional[str] = None,
market_code: Optional[str] = None,
asset_name: Optional[str] = None,
asset_dp: Optional[str] = None,
num_steps: int = 120,
granularity: Optional[Granularity] = Granularity.MINUTE,
block_size: int = 1,
@@ -52,6 +52,7 @@ def __init__(
] = None,
settle_at_end: bool = True,
price_process_fn: Optional[Callable] = None,
pause_every_n_steps: Optional[int] = None,
):
super().__init__(state_extraction_fn=state_extraction_fn)

@@ -62,6 +63,7 @@ def __init__(
self.block_length_seconds = block_length_seconds
self.settle_at_end = settle_at_end
self.price_process_fn = price_process_fn
self.pause_every_n_steps = pause_every_n_steps

# Asset parameters
self.asset_name = asset_name
@@ -88,6 +90,7 @@ def _generate_price_process(
granularity=self.granularity,
start=str(start),
end=str(end),
interpolation=f"{self.granularity.value}s",
)

return list(price_process)
@@ -98,6 +101,7 @@ def configure_agents(
tag: str,
market_config: Optional[MarketConfig] = None,
random_state: Optional[np.random.RandomState] = None,
**kwargs,
) -> Dict[str, StateAgent]:
market_config = market_config if market_config is not None else MarketConfig()

@@ -247,4 +251,5 @@ def configure_environment(
transactions_per_block=self.block_size,
vega_service=vega,
block_length_seconds=self.block_length_seconds,
pause_every_n_steps=self.pause_every_n_steps,
)