env.py

import numpy as np
from gymnasium import spaces
import equations
import visualize as vis


# flake8: noqa

class Env():
    def __init__(self):
        # 4 actions: [Fx, Fy, Fz, L]
        self.conbtinuous_action_space = spaces.Box(low=-0.3, high=0.3, shape=(4,), dtype=np.float32)
        # 13 states: [x, y, z, vx, vy, vz, qx, qy, qz, qw, wx, wy, wz]
        self.action_space = []
        for i in range(2):
            for k in range(2):
                a = np.zeros(4)
                if i != 3:
                    a[i] = 0.1*((-1)**k)
                self.action_space.append(a)
        self.action_space.append([-0.1, 0.1, 0., 0.])
        self.action_space.append([0.1, -0.1, 0., 0.])
        self.action_space.append([0., 0., 0., 0.])
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32)
        # number of timesteps in one episode
        self.episode_length = equations.num_steps
        # initial state
        self.state = np.zeros(13)

    def reset(self):
        # reset the state to the initial state
        self.state = np.zeros(13)
        self.state[1] = 10000  + np.random.rand()*500# initial y position
        self.state[0] = np.random.rand()*500 
        self.state[2] = np.random.rand()*500
        self.state[6] = 1  # initial quaternion
        self.state[10] = 10**(-5) # initial angular velocity
        self.state[11] = 10**(-5)
        self.state[12] = 10**(-5)
        self.episode_length = equations.num_steps
        return self.state

    def step(self, action):
        p0, v, q, w = self.state[:3], self.state[3:6], self.state[6:10], self.state[10:]
        Fx, Fy, Fz, L = action
        F = np.array([Fx, Fy, Fz])
        L = np.array([0, 0, L])
        p, v = equations.CW_finite_diff(p0, v, equations.n0, equations.dt, F)
        '''q, w = equations.q_and_w(q, w, L, equations.dt)'''
        q, w = [0, 0, 0, 0], [0, 0, 0]
        self.state = np.concatenate((p, v, q, w))
        self.episode_length -= 1
        done = self.episode_length == 0
        reward = self.reward(p0)
        if self.episode_length == 0:
            done = True
        return self.state, reward, done

    def reward(self, p0):
        # reward function
        r = 0
        # on y axis
        '''if self.state[1] > 25:
            r -= abs(self.state[1])/500  # for y, needs to come close but not to much
        elif self.state[1] < 1000 and self.state[1] > 10:
            r = 1
            if self.state[1] < 500:
                r += 2
                if self.state[1] < 100:
                    r += 3'''

        '''# stay on the orbit for x and z axis
        if self.state[0] > 200:
            r -= (self.state[0] - 100)/10
        elif self.state[2] > 200:
            r -= (self.state[2] - 100)/10'''
            
        '''r += max(-np.linalg.norm(self.state[:3])+ np.linalg.norm(p0), 0)/100  # distance to the satellite'''
        
        r += np.linalg.norm(self.state[:3])/1000  # distance to the satellite
        
        reward = r

        # globally, distance to the satellite
        '''if np.linalg.norm(self.state[:3]) > 15 and np.linalg.norm(self.state[:3]) < 50:
            r += 1000'''
        '''if np.linalg.norm(self.state[:3]) < 15:
            r -= 1000'''

        '''# then economy of propellant
        r -= np.linalg.norm(action[:3])'''

        '''# orientation constraint
        q_norm = self.state[6:10] / np.linalg.norm(self.state[6:10])
        d = q_norm[1:]
        dot_product = np.dot(d, self.state[:3])
        norm_r = np.linalg.norm(self.state[:3])
        constraint = -dot_product / norm_r
        constraint -= np.cos(np.deg2rad(20))
        if constraint < 0:
            r -= 50'''

        return reward
'''    
# test the environment and visualize the results
env = Env()
print(env.reset())
indices = []
for i in range(40):
    state, r, ok = env.step([0.2, -0.2, 0., 0.])
    print(r)
    indices.append([state[0], state[1], state[2]])
for i in range(20):
    state, r, ok = env.step([-0.2, 0.2, 0., 0.])
    print(r)
    indices.append([state[0], state[1], state[2]])
for i in range(20):
    state, r, ok = env.step([0.2, -0.2, 0., 0.])
    print(r)
    indices.append([state[0], state[1], state[2]])
for i in range(10):
    state, r, ok = env.step([0., -0., 0., 0.])
    print(r)
    indices.append([state[0], state[1], state[2]])
for i in range(30):
    state, r, ok = env.step([0., -0.3, 0., 0.])
    print(r)
    indices.append([state[0], state[1], state[2]])
print(env.action_space)

vis.visualize(indices)'''