maintenant objectif améliorer le temps de compil de l'env et plus de …

…valeur nan
abelsalm · May 12, 2024 · 6632437 · 6632437
1 parent a6ded7e
commit 6632437
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 32 deletions.
diff --git a/__pycache__/agent.cpython-312.pyc b/__pycache__/agent.cpython-312.pyc
diff --git a/__pycache__/env.cpython-312.pyc b/__pycache__/env.cpython-312.pyc
diff --git a/__pycache__/equations.cpython-312.pyc b/__pycache__/equations.cpython-312.pyc
diff --git a/__pycache__/visualize.cpython-312.pyc b/__pycache__/visualize.cpython-312.pyc
diff --git a/env.py b/env.py
@@ -13,11 +13,12 @@ def __init__(self):
         self.action_space = []
         for i in range(4):
             for j in range(2):
-                a = np.zeros(4)
-                if i != 3:
-                    a[i] = 0.3  
-                a[3] = 0.3*j
-                self.action_space.append(a)
+                for k in range(2):
+                    a = np.zeros(4)
+                    if i != 3:
+                        a[i] = 0.01*((-1)**k)
+                    a[3] = 0.01*j
+                    self.action_space.append(a)
 
         self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32)
         # number of timesteps in one episode
@@ -33,6 +34,7 @@ def reset(self):
         self.state[10] = 10**(-5) # initial angular velocity
         self.state[11] = 10**(-5)
         self.state[12] = 10**(-5)
+        self.episode_length = equations.num_steps
         return self.state
 
     def step(self, action):
@@ -46,30 +48,32 @@ def step(self, action):
         self.episode_length -= 1
         done = self.episode_length == 0
         reward = self.reward(self.state, action)
+        if self.episode_length == 0:
+            done = True
         return self.state, reward, done
 
     def reward(self, state, action):
         # reward function
         r = 0
         # on y axis
         if self.state[1] > 25:
-            r += (10000 - self.state[1])/1000000  # for y, needs to come close but not to much
+            r += abs((10000 - self.state[1]))/50000  # for y, needs to come close but not to much
         elif self.state[1] < 25 and self.state[1] > 10:
-            r += 10
+            r += 10000
         else:
-            r -= 10
+            r -= 10000
 
         # stay on the orbit for x and z axis
         if self.state[0] > 50:
-            r -= (self.state[0] - 50)/10
+            r -= (self.state[0] - 50)/100
         elif self.state[2] > 50:
-            r -= (self.state[2] - 50)/10
+            r -= (self.state[2] - 50)/100
 
         # globally, distance to the satellite
         if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
-            r += 10
+            r += 1000
         elif np.linalg.norm(self.state[:3]) < 15:
-            r -= 10
+            r -= 1000
 
         # then economy of propellant
         r -= np.linalg.norm(action[:3])
@@ -90,7 +94,7 @@ def reward(self, state, action):
 '''env = Env()
 print(env.reset())
 indices = [[0, 10000, 0]]
-for i in range(200):
+for i in range(100):
     if i%20 >= 10:
         a = 1
     else:

diff --git a/equations.py b/equations.py
@@ -6,7 +6,7 @@
 n0 = 2 * np.pi / (24 * 60 * 60)  # Earth angular's velocity (rad/s)
 m = 3000  # mass (kg)
 dt = 5 * 60  # dt in seconds
-total_time = 7 * 24 * 60 * 60  # total time of one week in seconds
+total_time = 2 * 24 * 60 * 60  # total time of one week in seconds
 num_steps = total_time//dt  # iterations
 R = 1.5  # radius of the chaser
 H = 8  # height of the chaser

diff --git a/test_dql_on_spacecraft.py b/test_dql_on_spacecraft.py
@@ -1,33 +1,37 @@
 from env import Env
 from agent import Agent
 import numpy as np
+import visualize as vis
 
 # flake8: noqa
 
 env = Env()
-agent = Agent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, actions=8,
-            memory_max=10000, epsilon_min=0.05, epsilon_down=2e-3)
+agent = Agent(gamma=0.99, epsilon=1.0, lr=0.003, dim_input=13, batch_size=64, actions=16,
+            memory_max=10000, epsilon_min=0.08, epsilon_down=2e-4)
 scores = []
 eps_history = []
 n_games = 50
 
+
+indices_dernier = []
 for i in range(n_games):
     done = False
     score = 0
-    observation = env.reset()[0]
+    observation = env.reset()
     while not done:
         action = agent.choose_action(observation)
         observation_, reward, done = env.step(env.action_space[action])
         score += reward
         agent.store_transition(observation, action, reward, observation_, done)
         agent.learn()
         observation = observation_
-        agent.step += 1
+        if i >= n_games-1:
+            indices_10_derniers = indices_dernier.append(env.state[0:3])
     scores.append(score)
     eps_history.append(agent.epsilon)
 
-    avg_score = np.mean(scores[-20:])
+    avg_score = np.mean(scores[-10:])
     print('episode', i, 'score %.2f' % score, 'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)
     print(env.state)
 
-
+vis.visualize(indices_dernier)
diff --git a/visualize.py b/visualize.py
@@ -9,8 +9,6 @@
 # Définition de la taille de la fenêtre
 screen_width = 800
 screen_height = 600
-screen = pygame.display.set_mode((screen_width, screen_height))
-pygame.display.set_caption("Vaisseau suivant une droite")
 
 # Couleurs
 BROWN =(165, 80, 80)
@@ -34,26 +32,28 @@
 point_x = 650
 point_y = 300
 
-def draw_circle():
+def draw_circle(screen):
     pygame.draw.circle(screen, BROWN, (650, point_y), 15)
 
-def draw_earth():
+def draw_earth(screen):
     pygame.draw.rect(screen, EARTH_BLUE, (0, 0, screen_width, 30))
     pygame.draw.rect(screen, EARTH_GREEN, (0, 0, screen_width, 15))
 
 # Fonction pour dessiner la droite
-def draw_line():
+def draw_line(screen):
     pygame.draw.line(screen, WHITE, (0, point_y), (screen_width, point_y))
 
 # Fonction pour mettre à jour la position du vaisseau
-def update_vaisseau(x, y):
+def update_vaisseau(screen, x, y):
     vaisseau_rect.center = (point_x - y, point_y + x)
 
-def dot(x, y, z):
+def dot(screen, x, y, z):
     pygame.draw.circle(screen, (255, -min(0, max(-255, z*255/1000)), max(0, min(255, z*255/1000))), (point_x - y, point_y + x), 2)
 
 # Boucle principale
 def visualize(indices):
+    screen = pygame.display.set_mode((screen_width, screen_height))
+    pygame.display.set_caption("Chaser trajectory visualization")
 
     running = True
     cruise = True   
@@ -68,19 +68,19 @@ def visualize(indices):
                 screen.fill(BLACK)
 
                 # Dessiner la droite
-                draw_earth()
-                draw_line()
-                draw_circle()
+                draw_earth(screen)
+                draw_line(screen)
+                draw_circle(screen)
 
                 # Mettre à jour la position du vaisseau
-                update_vaisseau(indices[i][0]/1000, indices[i][1]*6/100)  # Mettez vos coordonnées x et y ici
+                update_vaisseau(screen, indices[i][0]/1000, indices[i][1]*6/100)  # Mettez vos coordonnées x et y ici
 
                 # Afficher le vaisseau
                 screen.blit(vaisseau_image, vaisseau_rect)
 
                 if i > 0:
                     for j in range(i-1):
-                        dot(indices[j][0]/1000, indices[j][1]*6/100, indices[j][2])
+                        dot(screen, indices[j][0]/1000, indices[j][1]*6/100, indices[j][2])
 
                 # Actualiser l'affichage
                 pygame.display.flip()