deepql.py

# import : basically PyTorch librabries and Numpy
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# MLP for deep Q learning
# flake8: noqa


class DeepQN(nn.Module):
    def __init__(self, lr, dim_input, fc1_d, fc2_d, actions_output):
        super(DeepQN, self).__init__()
        self.lr = lr
        self.dim_input = dim_input
        self.fc1_d = fc1_d
        self.fc2_d = fc2_d
        self.actions_output = actions_output
        self.device = T.device("cuda" if T.cuda.is_available() else "cpu")

        # layers
        self.fc1 = nn.Linear(self.dim_input, self.fc1_d)
        self.fc2 = nn.Linear(self.fc1_d, self.fc2_d)
        self.fc3 = nn.Linear(self.fc2_d, self.actions_output)

        # optim and loss
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.loss = nn.MSELoss()

# forward method
    def forward(self, state):
        r1 = F.relu(self.fc1(state))
        r2 = F.relu(self.fc2(r1))
        actions = self.fc3(r2)
        return actions


class ContinuousDeepQN(nn.Module):
    def __init__(self, lr, dim_input, fc1_d, fc2_d):
        super(ContinuousDeepQN, self).__init__()
        self.lr = lr
        self.dim_input = dim_input
        self.fc1_d = fc1_d
        self.fc2_d = fc2_d
        self.device = T.device("cuda" if T.cuda.is_available() else "cpu")

        # layers
        self.fc1 = nn.Linear(self.dim_input, self.fc1_d)
        self.fc2 = nn.Linear(self.fc1_d, self.fc2_d)
        # Output layer now has 4 neurons, one for each action
        self.fc3 = nn.Linear(self.fc2_d, 4)

        # optim and loss
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.loss = nn.MSELoss()

    def forward(self, state):
        r1 = F.relu(self.fc1(state))
        r2 = F.relu(self.fc2(r1))
        # The network now outputs 4 values
        actions = self.fc3(r2)
        return actions


class Actor(nn.Module):
    def __init__(self, lr, dim_input, fc1_d, fc2_d, dim_action_output):
        super(Actor, self).__init__()
        self.lr = lr
        self.dim_input = dim_input
        self.fc1_d = fc1_d
        self.fc2_d = fc2_d
        self.dim_action_output = dim_action_output
        self.device = T.device("cuda" if T.cuda.is_available() else "cpu")

        # layers
        self.fc1 = nn.Linear(self.dim_input, self.fc1_d)
        self.fc2 = nn.Linear(self.fc1_d, self.fc2_d)
        self.fc3 = nn.Linear(self.fc2_d, self.dim_action_output)

        # optim and loss
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)

    def forward(self, state):
        r1 = F.relu(self.fc1(state))
        r2 = F.relu(self.fc2(r1))
        actions = T.tanh(self.fc3(r2))
        return actions


class Critic(nn.Module):
    def __init__(self, lr, dim_input, fc1_d, fc2_d, dim_action_input):
        super(Critic, self).__init__()
        self.lr = lr
        self.dim_input = dim_input
        self.fc1_d = fc1_d
        self.fc2_d = fc2_d
        self.dim_action_input = dim_action_input
        self.device = T.device("cuda" if T.cuda.is_available() else "cpu")

        # layers
        self.fc1 = nn.Linear(self.dim_input + self.dim_action_input, self.fc1_d)
        self.fc2 = nn.Linear(self.fc1_d, self.fc2_d)
        self.fc3 = nn.Linear(self.fc2_d, 1)

        # optim and loss
        self.optimizer = optim.Adam(self.parameters(), lr=self.lr)
        self.loss = nn.MSELoss()

    def forward(self, state, action):
        state_action = T.cat([state, action], dim=1)
        r1 = F.relu(self.fc1(state_action))
        r2 = F.relu(self.fc2(r1))
        q_value = self.fc3(r2)
        return q_value