-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAgent.py
189 lines (159 loc) · 8.65 KB
/
Agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from ChromeNGL import ChromeNGL
from utils import parse_action
import time
import json
import torch
class Agent:
def __init__(self, model=None, start_session: bool = False):
self.action_history = []
self.state = None
self.action = None
self.reward = None
self.sleep_time = 1 # time between actions
if start_session:
self.chrome_ngl = ChromeNGL()
self.chrome_ngl.start_session()
else:
self.chrome_ngl = None
self.model = model
def prepare_state(self):
state = self.chrome_ngl.get_JSON_state()
json_state = json.loads(state)
# for now the state we give in just the parsed position, crossSectionScale, projectionOrientation, projectionScale
position = json_state["position"]
crossSectionScale = json_state["crossSectionScale"]
projectionOrientation = json_state["projectionOrientation"]
projectionScale = json_state["projectionScale"]
pos_state = [position, crossSectionScale, projectionOrientation, projectionScale]
curr_image = self.chrome_ngl.get_screenshot()
return pos_state, curr_image, json_state
def decision(self):
# make a decision based on the current state
pos_state, curr_image, json_state = self.prepare_state()
# preprocess the state by making it a vector
# input = [position, crossSectionScale, projectionOrientation, projectionScale, image]
# MODEL INPUT = [state, image] (or memory, TBD)
## Does not work as everything should be boolean for RL, make boolean increments for each action ?
# OUTPUT OF THE MODEL:
# Actions can be:
# - left click bool
# - right click bool
# - double click bool
# - x float mouse position
# - y float mouse position
# - key Shift bool
# - key Ctrl bool
# - key Alt bool
# JSON actions:
# JSON_change: bool
# delta_position: 1x3 float array
# delta_crossSectionScale: float
# delta_projectionOrientation: 1x4 float array
# delta_projectionScale: float
# output_vector = [
# left_click, right_click, double_click, # 3 booleans
# x, y, # 2 floats for mouse position
# key_Shift, key_Ctrl, key_Alt, # 3 booleans for keys
# json_change, # 1 boolean for JSON change
# delta_position_x, delta_position_y, delta_position_z, # 3 floats
# delta_crossSectionScale, # 1 float
# delta_projectionOrientation_q1, delta_projectionOrientation_q2,
# delta_projectionOrientation_q3, delta_projectionOrientation_q4, # 4 floats
# delta_projectionScale # 1 float
# ]
discrete_probs, continuous_probs = self.model.action(pos_state, curr_image)
output_vector = self.model.build_output_vector(discrete_probs, continuous_probs)
# APPLY ACTIONS
#print(output_vector)
#print(json_state)
self.apply_actions(output_vector, json_state)
return discrete_probs, continuous_probs, output_vector
def apply_actions(self, output_vector, json_state):
(
left_click, right_click, double_click, # 3 booleans
x, y, # 2 floats for mouse position
key_Shift, key_Ctrl, key_Alt, # 3 booleans for keys
json_change, # 1 boolean for JSON change
delta_position_x, delta_position_y, delta_position_z, # 3 floats
delta_crossSectionScale, # 1 float
delta_projectionOrientation_q1, delta_projectionOrientation_q2,
delta_projectionOrientation_q3, delta_projectionOrientation_q4, # 4 floats
delta_projectionScale # 1 float
) = output_vector
# fitting output_vector back into action space
x *= 1200
y *= 900
key_pressed = ""
if key_Shift:
print("Shift key pressed")
key_pressed += "Shift, "
if key_Ctrl:
print("Ctrl key pressed")
key_pressed += "Ctrl, "
if key_Alt:
print("Alt key pressed")
key_pressed += "Alt, "
key_pressed = key_pressed.strip(", ")
if left_click:
print("Decided to do a left click at position", x, y)
self.chrome_ngl.mouse_key_action(x, y, "left_click", key_pressed)
elif right_click:
print("Decided to do a right click at position", x, y)
self.chrome_ngl.mouse_key_action(x, y, "right_click", key_pressed)
elif double_click:
print("Decided to do a double click at position", x, y)
self.chrome_ngl.mouse_key_action(x, y, "double_click", key_pressed)
elif json_change:
print("Decided to change the JSON state")
json_state["position"][0] += delta_position_x.item() if isinstance(delta_position_x, torch.Tensor) else delta_position_x
json_state["position"][1] += delta_position_y.item() if isinstance(delta_position_y, torch.Tensor) else delta_position_y
json_state["position"][2] += delta_position_z.item() if isinstance(delta_position_z, torch.Tensor) else delta_position_z
json_state["crossSectionScale"] += delta_crossSectionScale.item() if isinstance(delta_crossSectionScale, torch.Tensor) else delta_crossSectionScale
json_state["projectionOrientation"][0] += delta_projectionOrientation_q1.item() if isinstance(delta_projectionOrientation_q1, torch.Tensor) else delta_projectionOrientation_q1
json_state["projectionOrientation"][1] += delta_projectionOrientation_q2.item() if isinstance(delta_projectionOrientation_q2, torch.Tensor) else delta_projectionOrientation_q2
json_state["projectionOrientation"][2] += delta_projectionOrientation_q3.item() if isinstance(delta_projectionOrientation_q3, torch.Tensor) else delta_projectionOrientation_q3
json_state["projectionOrientation"][3] += delta_projectionOrientation_q4.item() if isinstance(delta_projectionOrientation_q4, torch.Tensor) else delta_projectionOrientation_q4
json_state["projectionScale"] += delta_projectionScale.item() if isinstance(delta_projectionScale, torch.Tensor) else delta_projectionScale
self.chrome_ngl.change_JSON_state_url(json_state)
print("Decision acted upon")
def follow_episode(self, episode):
""""
This function takes a recording and follows the actions of the user in the Neuroglancer viewer step by step
At the moment, the JSON state is fully changed which is not definitive behavior (sort of cheating)
"""
sequence = episode
time.sleep(self.sleep_time)
self.chrome_ngl.change_JSON_state_url(json.dumps(sequence[0]["state"]))
for i in range(1,len(sequence)):
start_time = time.time()
#self.chrome_ngl.get_screenshot("./screenshots/screenshot_" + str(i) + ".png")
#print("time to get screenshot: ", time.time() - start_time)
print("Step: ", i)
step = sequence[i] # state_step is a dictionary containing keys: state, action, time
step_state = step["state"]
step_action = step["action"]
print(step_action)
step_time = step["time"]
parsed_action, direct_json_change = parse_action(step_action)
#print(parsed_action)
if direct_json_change:
# time.sleep(0.01)
json_state = json.dumps(step_state)
self.chrome_ngl.change_JSON_state_url(json_state)
else:
#print("About to do a mouse action: ", parsed_action)
#time.sleep(0.05)
self.chrome_ngl.mouse_key_action(parsed_action['x'], parsed_action['y'], parsed_action['click_type'], parsed_action['keys_pressed'])
#print("Mouse action achieved")
def reset(self):
self.action_history = []
self.chrome_ngl.start_neuroglancer_session()
if __name__ == "__main__":
rl_agent = Agent(start_session=True)
rl_agent.chrome_ngl.start_neuroglancer_session()
file_path = "/Users/ri5462/Documents/PNI/RLAgent/episodes/episode_1.json"
with open(file_path, "r") as file:
data = json.load(file)
rl_agent.follow_episode(data)
print("Episode completed")
time.sleep(50)