Skip to content

Commit

Permalink
env tourne bien sayeeeee
Browse files Browse the repository at this point in the history
  • Loading branch information
abelsalm committed May 12, 2024
1 parent 5f8d01b commit 533b270
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 40 deletions.
Binary file modified __pycache__/agent.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/deepql.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/env.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/equations.cpython-312.pyc
Binary file not shown.
8 changes: 5 additions & 3 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def store_transition(self, state, action, reward, state_, done):
# here we use the epsilon gready algorithm strategy
def choose_action(self, observation):
if np.random.random() > self.epsilon:
state = T.tensor([observation]).to(self.Q_eval.device)
state = T.tensor([observation]).to(self.Q_eval.device).float()
actions = self.Q_eval.forward(state)
action = T.argmax(actions).item()

Expand Down Expand Up @@ -97,7 +97,9 @@ def learn(self):
self.epsilon = self.epsilon_min


class ChaserAgent():

# actor critic strategy, later...
'''class ChaserAgent():
def __init__(self, gamma, epsilon, lr, dim_input, batch_size, dim_action,
memory_max=10000, epsilon_min=0.01, epsilon_down=5e-4):
# all of the hyperparameters
Expand Down Expand Up @@ -187,4 +189,4 @@ def learn(self):
if self.epsilon > self.epsilon_min:
self.epsilon -= self.epsilon_down
else:
self.epsilon = self.epsilon_min
self.epsilon = self.epsilon_min'''
36 changes: 25 additions & 11 deletions env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,17 @@
class Env():
def __init__(self):
# 4 actions: [Fx, Fy, Fz, L]
self.action_space = spaces.Box(low=-0.3, high=0.3, shape=(4,), dtype=np.float32)
self.conbtinuous_action_space = spaces.Box(low=-0.3, high=0.3, shape=(4,), dtype=np.float32)
# 13 states: [x, y, z, vx, vy, vz, qx, qy, qz, qw, wx, wy, wz]
self.action_space = []
for i in range(4):
for j in range(2):
a = np.zeros(4)
if i != 3:
a[i] = 0.3
a[3] = 0.3*j
self.action_space.append(a)

self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32)
# number of timesteps in one episode
self.episode_length = equations.num_steps
Expand All @@ -18,6 +27,11 @@ def __init__(self):
def reset(self):
# reset the state to the initial state
self.state = np.zeros(13)
self.state[1] = 10000 # initial y position
self.state[6] = 1 # initial quaternion
self.state[10] = 10**(-5) # initial angular velocity
self.state[11] = 10**(-5)
self.state[12] = 10**(-5)
return self.state

def step(self, action):
Expand All @@ -38,11 +52,11 @@ def reward(self, state, action):
r = 0
# on y axis
if self.state[1] > 25:
r += (10000 - self.state[1])/10000 # for y, needs to come close but not to much
r += (10000 - self.state[1])/1000000 # for y, needs to come close but not to much
elif self.state[1] < 25 and self.state[1] > 10:
r += 50
r += 10
else:
r -= 50
r -= 10

# stay on the orbit for x and z axis
if self.state[0] > 50:
Expand All @@ -52,26 +66,26 @@ def reward(self, state, action):

# globally, distance to the satellite
if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
r += 50
r += 10
elif np.linalg.norm(self.state[:3]) < 15:
r -= 50
r -= 10

# then economy of propellant
r -= np.linalg.norm(action[:3])

# orientation constraint
'''# orientation constraint
q_norm = self.state[6:10] / np.linalg.norm(self.state[6:10])
d = q_norm[1:]
dot_product = np.dot(d, self.state[:3])
norm_r = np.linalg.norm(self.state[:3])
constraint = -dot_product / norm_r
constraint -= np.cos(np.deg2rad(20))
if constraint < 0:
r -= 50
r -= 50'''

return r

env = Env()
print(env.reset())
for i in range(10):
print(env.step([0.1, 0, 0, 0]))
for i in range(100):
print(env.step([0., -0.2, 0., 0.]))
32 changes: 7 additions & 25 deletions test_dql_on_spacecraft.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,33 @@
from env import Env
from agent import ChaserAgent
from agent import Agent
import numpy as np

# flake8: noqa

env = Env()
agent = ChaserAgent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, dim_action=4,
memory_max=10000, epsilon_min=0.03, epsilon_down=2e-5)
agent = Agent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, actions=8,
memory_max=10000, epsilon_min=0.05, epsilon_down=2e-3)
scores = []
eps_history = []
n_games = 500
n_games = 50

for i in range(n_games):
done = False
score = 0
observation = env.reset()[0]
while not done:
action = agent.choose_action(observation)
observation_, reward, done = env.step(action)
observation_, reward, done = env.step(env.action_space[action])
score += reward
agent.store_transition(observation, action, reward, observation_, done)
agent.learn()
observation = observation_
agent.step += 1
scores.append(score)
eps_history.append(agent.epsilon)

avg_score = np.mean(scores[-20:])
print('episode', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)
if i+1 % 25 == 0:
print('episode', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)
print(env.state)


# test the rl algo and display the results of the agent
env = Env()
avg_score = 0
for i in range(100):
state = env.reset()
done = False
score = 0
observation = env.reset()[0]
while not done:
action = agent.choose_action(observation)
observation_, reward, done = env.step(action)
observation = observation_
score += reward
if score > 200:
avg_score += 1
print('Episode:{} Score:{}'.format(i, score))
print('Average score:', avg_score/100)
env.close()
2 changes: 1 addition & 1 deletion test_with_lunlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
memory_max=10000, epsilon_min=0.03, epsilon_down=2e-5)
scores = []
eps_history = []
n_games = 500
n_games = 50

for i in range(n_games):
done = False
Expand Down

0 comments on commit 533b270

Please sign in to comment.