Skip to content

Commit

Permalink
pas terrible la
Browse files Browse the repository at this point in the history
  • Loading branch information
abelsalm committed May 14, 2024
1 parent f7ad6ef commit 2295bb1
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 59 deletions.
Binary file modified __pycache__/agent.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/env.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/equations.cpython-312.pyc
Binary file not shown.
Binary file modified __pycache__/visualize.cpython-312.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# first agent that works with deepqn, we will change methods later
# import : basically PyTorch librabries and Numpy
import torch as T
from deepql import Actor, Critic, DeepQN
from deepql import DeepQN

import numpy as np

Expand Down Expand Up @@ -51,7 +51,7 @@ def store_transition(self, state, action, reward, state_, done):
# here we use the epsilon gready algorithm strategy
def choose_action(self, observation):
if np.random.random() > self.epsilon:
state = T.tensor([observation]).to(self.Q_eval.device).float()
state = T.tensor(np.array(observation), dtype=T.float32).to(self.Q_eval.device)
actions = self.Q_eval.forward(state)
action = T.argmax(actions).item()

Expand Down
73 changes: 33 additions & 40 deletions env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import equations
import visualize as vis


# flake8: noqa

class Env():
Expand All @@ -12,13 +13,11 @@ def __init__(self):
# 13 states: [x, y, z, vx, vy, vz, qx, qy, qz, qw, wx, wy, wz]
self.action_space = []
for i in range(4):
for j in range(2):
for k in range(2):
a = np.zeros(4)
if i != 3:
a[i] = 0.01*((-1)**k)
a[3] = 0.01*j
self.action_space.append(a)
for k in range(2):
a = np.zeros(4)
if i != 3:
a[i] = 0.02*((-1)**k)
self.action_space.append(a)

self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32)
# number of timesteps in one episode
Expand All @@ -43,7 +42,8 @@ def step(self, action):
F = np.array([Fx, Fy, Fz])
L = np.array([0, 0, L])
r, v = equations.CW_finite_diff(r, v, equations.n0, equations.dt, F)
q, w = equations.q_and_w(q, w, L, equations.dt)
'''q, w = equations.q_and_w(q, w, L, equations.dt)'''
q, w = [1, 0, 0, 0], [0, 0, 0]
self.state = np.concatenate((r, v, q, w))
self.episode_length -= 1
done = self.episode_length == 0
Expand All @@ -57,26 +57,33 @@ def reward(self, state, action):
r = 0
# on y axis
if self.state[1] > 25:
r += abs((10000 - self.state[1]))/50000 # for y, needs to come close but not to much
elif self.state[1] < 25 and self.state[1] > 10:
r += 10000
else:
r -= 10000
r -= abs(self.state[1])/5000 # for y, needs to come close but not to much
elif self.state[1] < 1000 and self.state[1] > 10:
r = 1
if self.state[1] < 500:
r += 2
if self.state[1] < 100:
r += 3

# stay on the orbit for x and z axis
if self.state[0] > 50:
r -= (self.state[0] - 50)/100
elif self.state[2] > 50:
r -= (self.state[2] - 50)/100
if self.state[0] > 200:
r -= (self.state[0] - 100)/10
elif self.state[2] > 200:
r -= (self.state[2] - 100)/10

if np.linalg.norm(action[3:6]) > 0.3:
r -= 100

reward = r

# globally, distance to the satellite
if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
r += 1000
elif np.linalg.norm(self.state[:3]) < 15:
r -= 1000
'''if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
r += 1000'''
'''if np.linalg.norm(self.state[:3]) < 15:
r -= 1000'''

# then economy of propellant
r -= np.linalg.norm(action[:3])
'''# then economy of propellant
r -= np.linalg.norm(action[:3])'''

'''# orientation constraint
q_norm = self.state[6:10] / np.linalg.norm(self.state[6:10])
Expand All @@ -88,32 +95,18 @@ def reward(self, state, action):
if constraint < 0:
r -= 50'''

return r
return reward

# test the environment and visualize the results
env = Env()
print(env.reset())
indices = [[0, 10000, 0]]
for i in range(70):
if i%20 >= 10:
a = 1
else:
a = -1
state, r, ok = env.step([0.1, -0.2, 0.01, 0.1])
indices.append([state[0], state[1], state[2]])
for i in range(30):
if i%20 >= 10:
a = 1
else:
a = -1
state, r, ok = env.step([0, 0.1, 0., -0.1])
indices.append([state[0], state[1], state[2]])
indices = [[0, 100, 0]]
for i in range(100):
if i%20 >= 10:
a = 1
else:
a = -1
state, r, ok = env.step([-0.2, 0.4, -0.1, 0.])
state, r, ok = env.step([0.1, -0.2, 0.01, 0.])
indices.append([state[0], state[1], state[2]])

vis.visualize(indices)
Expand Down
6 changes: 3 additions & 3 deletions equations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# flake8: noqa

# constants
n0 = 2 * np.pi / (24 * 60 * 60) # Earth angular's velocity (rad/s)
n0 = 1 #2 * np.pi / (24 * 60 * 60) # Earth angular's velocity (rad/s)
m = 3000 # mass (kg)
dt = 5 * 60 # dt in seconds
total_time = 2 * 24 * 60 * 60 # total time of one week in seconds
dt = 20 * 60 # dt in seconds
total_time = 4 * 24 * 60 * 60 # total time of one week in seconds
num_steps = total_time//dt # iterations
R = 1.5 # radius of the chaser
H = 8 # height of the chaser
Expand Down
31 changes: 26 additions & 5 deletions test_dql_on_spacecraft.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,57 @@
from agent import Agent
import numpy as np
import visualize as vis
import equations as eq
import time

# flake8: noqa

env = Env()
agent = Agent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, actions=16,
agent = Agent(gamma=0.98, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, actions=8,
memory_max=10000, epsilon_min=0.08, epsilon_down=2e-4)
scores = []
eps_history = []
n_games = 50


indices_dernier = []
for i in range(n_games):
done = False
score = 0
observation = env.reset()
indices_derniers = []
while not done:
action = agent.choose_action(observation)
observation_, reward, done = env.step(env.action_space[action])
score += reward
agent.store_transition(observation, action, reward, observation_, done)
agent.learn()
observation = observation_
if i >= n_games-1:
indices_10_derniers = indices_dernier.append(env.state[0:3])
'''if i >= n_games -10:'''
indices_derniers.append(env.state[0:3])
scores.append(score)
eps_history.append(agent.epsilon)

vis.visualize(indices_derniers)

avg_score = np.mean(scores[-10:])
print('episode', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)
print(env.state)
time.sleep(2)

vis.visualize(indices_dernier)
'''for i in range(10):
vis.visualize(indices_derniers[i*eq.num_steps:(i+1)*eq.num_steps])'''

# test the rl algo and display the results of the agent
env = Env()
for i in range(1, 6):
state = env.reset()
done = False
score = 0
observation = env.reset()[0:6]
while not done:
action = agent.choose_action(observation)
observation_, reward, done = env.step(action)
observation = observation_
score += reward

print('Episode:{} Score:{}'.format(i, score))
6 changes: 3 additions & 3 deletions test_with_lunlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

env = gym.make('LunarLander-v2')
agent = Agent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=8, batch_size=64, actions=4,
memory_max=10000, epsilon_min=0.03, epsilon_down=2e-5)
memory_max=10000, epsilon_min=0.03, epsilon_down=4e-5)
scores = []
eps_history = []
n_games = 50
n_games = 500

for i in range(n_games):
done = False
Expand Down Expand Up @@ -42,7 +42,7 @@
observation = env.reset()[0]
while not done:
action = agent.choose_action(observation)
observation_, reward, done, info1, info2 = env.step(action)
observation_, reward, done = env.step(action)
observation = observation_
score += reward
if score > 200:
Expand Down
Loading

0 comments on commit 2295bb1

Please sign in to comment.