-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRobot.py
137 lines (112 loc) · 4.39 KB
/
Robot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import random
from Maze import Maze
import numpy as np
import math
class Robot(object):
def __init__(self, maze, alpha=0.5, gamma=0.9, epsilon0=0.5):
self.maze = maze
self.valid_actions = self.maze.valid_actions
self.state = None
self.action = None
# Set Parameters of the Learning Robot
self.alpha = alpha
self.gamma = gamma
self.epsilon0 = epsilon0
self.epsilon = epsilon0
self.t = 0
self.Qtable = {}
self.reset()
def reset(self):
"""
Reset the robot
"""
self.state = self.sense_state()
self.create_Qtable_line(self.state)
def set_status(self, learning=False, testing=False):
"""
Determine whether the robot is learning its q table, or
exceuting the testing procedure.
"""
self.learning = learning
self.testing = testing
def update_parameter(self):
"""
Some of the paramters of the q learning robot can be altered,
update these parameters when necessary.
"""
a=-0.1
if self.testing:
# TODO 1. No random choice when testing
self.epsilon=0
else:
# TODO 2. Update parameters when learning
self.epsilon = self.epsilon0 * math.exp( a * self.t)
self.t += 1
return self.epsilon
def sense_state(self):
"""
Get the current state of the robot. In this
"""
# TODO 3. Return robot's current state
return self.maze.sense_robot()
def create_Qtable_line(self, state):
"""
Create the qtable with the current state
"""
# TODO 4. Create qtable with current state
# Our qtable should be a two level dict,
# Qtable[state] ={'u':xx, 'd':xx, ...}
# If Qtable[state] already exits, then do
# not change it.
#if state not in self.Qtable:
#self.Qtable[state]={'u':0,'d':0,'l':0,'r':0}
self.Qtable.setdefault(state, {a: 0.0 for a in self.valid_actions})
def choose_action(self):
"""
Return an action according to given rules
"""
def is_random_exploration():
# TODO 5. Return whether do random choice
# hint: generate a random number, and compare
# it with epsilon
n=np.random.uniform()
return n<self.epsilon
if self.learning:
if is_random_exploration():
# TODO 6. Return random choose aciton
return random.choice(self.valid_actions)
else:
# TODO 7. Return action with highest q value
return max(self.Qtable[self.state], key=self.Qtable[self.state].get)
elif self.testing:
# TODO 7. choose action with highest q value
return max(self.Qtable[self.state], key=self.Qtable[self.state].get)
else:
# TODO 6. Return random choose aciton
return random.choice(self.valid_actions)
def update_Qtable(self, r, action, next_state):
"""
Update the qtable according to the given rule.
"""
if self.learning:
# TODO 8. When learning, update the q table according
# to the given rules
Max_NQ=max(self.Qtable[next_state].values())
self.Qtable[self.state][action]=(1-self.alpha)*self.Qtable[self.state][action]+self.alpha*(r+self.gamma*Max_NQ)
def update(self):
"""
Describle the procedure what to do when update the robot.
Called every time in every epoch in training or testing.
Return current action and reward.
"""
self.state = self.sense_state() # Get the current state
print( self.state)
self.create_Qtable_line(self.state) # For the state, create q table line
action = self.choose_action() # choose action for this state
reward = self.maze.move_robot(action) # move robot for given action
next_state = self.sense_state() # get next state
self.create_Qtable_line(next_state) # create q table line for next state
if self.learning and not self.testing:
self.update_Qtable(reward, action, next_state) # update q table
self.update_parameter() # update parameters
return action, reward