-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenv.py
132 lines (116 loc) · 4.45 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
from gymnasium import spaces
import equations
import visualize as vis
# flake8: noqa
class Env():
def __init__(self):
# 4 actions: [Fx, Fy, Fz, L]
self.conbtinuous_action_space = spaces.Box(low=-0.3, high=0.3, shape=(4,), dtype=np.float32)
# 13 states: [x, y, z, vx, vy, vz, qx, qy, qz, qw, wx, wy, wz]
self.action_space = []
for i in range(2):
for k in range(2):
a = np.zeros(4)
if i != 3:
a[i] = 0.1*((-1)**k)
self.action_space.append(a)
self.action_space.append([-0.1, 0.1, 0., 0.])
self.action_space.append([0.1, -0.1, 0., 0.])
self.action_space.append([0., 0., 0., 0.])
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32)
# number of timesteps in one episode
self.episode_length = equations.num_steps
# initial state
self.state = np.zeros(13)
def reset(self):
# reset the state to the initial state
self.state = np.zeros(13)
self.state[1] = 10000 + np.random.rand()*500# initial y position
self.state[0] = np.random.rand()*500
self.state[2] = np.random.rand()*500
self.state[6] = 1 # initial quaternion
self.state[10] = 10**(-5) # initial angular velocity
self.state[11] = 10**(-5)
self.state[12] = 10**(-5)
self.episode_length = equations.num_steps
return self.state
def step(self, action):
p0, v, q, w = self.state[:3], self.state[3:6], self.state[6:10], self.state[10:]
Fx, Fy, Fz, L = action
F = np.array([Fx, Fy, Fz])
L = np.array([0, 0, L])
p, v = equations.CW_finite_diff(p0, v, equations.n0, equations.dt, F)
'''q, w = equations.q_and_w(q, w, L, equations.dt)'''
q, w = [0, 0, 0, 0], [0, 0, 0]
self.state = np.concatenate((p, v, q, w))
self.episode_length -= 1
done = self.episode_length == 0
reward = self.reward(p0)
if self.episode_length == 0:
done = True
return self.state, reward, done
def reward(self, p0):
# reward function
r = 0
# on y axis
'''if self.state[1] > 25:
r -= abs(self.state[1])/500 # for y, needs to come close but not to much
elif self.state[1] < 1000 and self.state[1] > 10:
r = 1
if self.state[1] < 500:
r += 2
if self.state[1] < 100:
r += 3'''
'''# stay on the orbit for x and z axis
if self.state[0] > 200:
r -= (self.state[0] - 100)/10
elif self.state[2] > 200:
r -= (self.state[2] - 100)/10'''
'''r += max(-np.linalg.norm(self.state[:3])+ np.linalg.norm(p0), 0)/100 # distance to the satellite'''
r += np.linalg.norm(self.state[:3])/1000 # distance to the satellite
reward = r
# globally, distance to the satellite
'''if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
r += 1000'''
'''if np.linalg.norm(self.state[:3]) < 15:
r -= 1000'''
'''# then economy of propellant
r -= np.linalg.norm(action[:3])'''
'''# orientation constraint
q_norm = self.state[6:10] / np.linalg.norm(self.state[6:10])
d = q_norm[1:]
dot_product = np.dot(d, self.state[:3])
norm_r = np.linalg.norm(self.state[:3])
constraint = -dot_product / norm_r
constraint -= np.cos(np.deg2rad(20))
if constraint < 0:
r -= 50'''
return reward
'''
# test the environment and visualize the results
env = Env()
print(env.reset())
indices = []
for i in range(40):
state, r, ok = env.step([0.2, -0.2, 0., 0.])
print(r)
indices.append([state[0], state[1], state[2]])
for i in range(20):
state, r, ok = env.step([-0.2, 0.2, 0., 0.])
print(r)
indices.append([state[0], state[1], state[2]])
for i in range(20):
state, r, ok = env.step([0.2, -0.2, 0., 0.])
print(r)
indices.append([state[0], state[1], state[2]])
for i in range(10):
state, r, ok = env.step([0., -0., 0., 0.])
print(r)
indices.append([state[0], state[1], state[2]])
for i in range(30):
state, r, ok = env.step([0., -0.3, 0., 0.])
print(r)
indices.append([state[0], state[1], state[2]])
print(env.action_space)
vis.visualize(indices)'''