forked from andyzeng/visual-pushing-grasping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
executable file
·325 lines (252 loc) · 13.1 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
import struct
import math
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
def get_pointcloud(color_img, depth_img, camera_intrinsics):
# Get depth image size
im_h = depth_img.shape[0]
im_w = depth_img.shape[1]
# Project depth into 3D point cloud in camera coordinates
pix_x,pix_y = np.meshgrid(np.linspace(0,im_w-1,im_w), np.linspace(0,im_h-1,im_h))
cam_pts_x = np.multiply(pix_x-camera_intrinsics[0][2],depth_img/camera_intrinsics[0][0])
cam_pts_y = np.multiply(pix_y-camera_intrinsics[1][2],depth_img/camera_intrinsics[1][1])
cam_pts_z = depth_img.copy()
cam_pts_x.shape = (im_h*im_w,1)
cam_pts_y.shape = (im_h*im_w,1)
cam_pts_z.shape = (im_h*im_w,1)
# Reshape image into colors for 3D point cloud
rgb_pts_r = color_img[:,:,0]
rgb_pts_g = color_img[:,:,1]
rgb_pts_b = color_img[:,:,2]
rgb_pts_r.shape = (im_h*im_w,1)
rgb_pts_g.shape = (im_h*im_w,1)
rgb_pts_b.shape = (im_h*im_w,1)
cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1)
rgb_pts = np.concatenate((rgb_pts_r, rgb_pts_g, rgb_pts_b), axis=1)
return cam_pts, rgb_pts
def get_heightmap(color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits, heightmap_resolution):
# Compute heightmap size
heightmap_size = np.round(((workspace_limits[1][1] - workspace_limits[1][0])/heightmap_resolution, (workspace_limits[0][1] - workspace_limits[0][0])/heightmap_resolution)).astype(int)
# Get 3D point cloud from RGB-D images
surface_pts, color_pts = get_pointcloud(color_img, depth_img, cam_intrinsics)
# Transform 3D point cloud from camera coordinates to robot coordinates
surface_pts = np.transpose(np.dot(cam_pose[0:3,0:3],np.transpose(surface_pts)) + np.tile(cam_pose[0:3,3:],(1,surface_pts.shape[0])))
# Sort surface points by z value
sort_z_ind = np.argsort(surface_pts[:,2])
surface_pts = surface_pts[sort_z_ind]
color_pts = color_pts[sort_z_ind]
# Filter out surface points outside heightmap boundaries
heightmap_valid_ind = np.logical_and(np.logical_and(np.logical_and(np.logical_and(surface_pts[:,0] >= workspace_limits[0][0], surface_pts[:,0] < workspace_limits[0][1]), surface_pts[:,1] >= workspace_limits[1][0]), surface_pts[:,1] < workspace_limits[1][1]), surface_pts[:,2] < workspace_limits[2][1])
surface_pts = surface_pts[heightmap_valid_ind]
color_pts = color_pts[heightmap_valid_ind]
# Create orthographic top-down-view RGB-D heightmaps
color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
depth_heightmap = np.zeros(heightmap_size)
heightmap_pix_x = np.floor((surface_pts[:,0] - workspace_limits[0][0])/heightmap_resolution).astype(int)
heightmap_pix_y = np.floor((surface_pts[:,1] - workspace_limits[1][0])/heightmap_resolution).astype(int)
color_heightmap_r[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[0]]
color_heightmap_g[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[1]]
color_heightmap_b[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[2]]
color_heightmap = np.concatenate((color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2)
depth_heightmap[heightmap_pix_y,heightmap_pix_x] = surface_pts[:,2]
z_bottom = workspace_limits[2][0]
depth_heightmap = depth_heightmap - z_bottom
depth_heightmap[depth_heightmap < 0] = 0
depth_heightmap[depth_heightmap == -z_bottom] = np.nan
return color_heightmap, depth_heightmap
# Save a 3D point cloud to a binary .ply file
def pcwrite(xyz_pts, filename, rgb_pts=None):
assert xyz_pts.shape[1] == 3, 'input XYZ points should be an Nx3 matrix'
if rgb_pts is None:
rgb_pts = np.ones(xyz_pts.shape).astype(np.uint8)*255
assert xyz_pts.shape == rgb_pts.shape, 'input RGB colors should be Nx3 matrix and same size as input XYZ points'
# Write header for .ply file
pc_file = open(filename, 'wb')
pc_file.write('ply\n')
pc_file.write('format binary_little_endian 1.0\n')
pc_file.write('element vertex %d\n' % xyz_pts.shape[0])
pc_file.write('property float x\n')
pc_file.write('property float y\n')
pc_file.write('property float z\n')
pc_file.write('property uchar red\n')
pc_file.write('property uchar green\n')
pc_file.write('property uchar blue\n')
pc_file.write('end_header\n')
# Write 3D points to .ply file
for i in range(xyz_pts.shape[0]):
pc_file.write(bytearray(struct.pack("fffccc",xyz_pts[i][0],xyz_pts[i][1],xyz_pts[i][2],rgb_pts[i][0].tostring(),rgb_pts[i][1].tostring(),rgb_pts[i][2].tostring())))
pc_file.close()
def get_affordance_vis(grasp_affordances, input_images, num_rotations, best_pix_ind):
vis = None
for vis_row in range(num_rotations/4):
tmp_row_vis = None
for vis_col in range(4):
rotate_idx = vis_row*4+vis_col
affordance_vis = grasp_affordances[rotate_idx,:,:]
affordance_vis[affordance_vis < 0] = 0 # assume probability
# affordance_vis = np.divide(affordance_vis, np.max(affordance_vis))
affordance_vis[affordance_vis > 1] = 1 # assume probability
affordance_vis.shape = (grasp_affordances.shape[1], grasp_affordances.shape[2])
affordance_vis = cv2.applyColorMap((affordance_vis*255).astype(np.uint8), cv2.COLORMAP_JET)
input_image_vis = (input_images[rotate_idx,:,:,:]*255).astype(np.uint8)
input_image_vis = cv2.resize(input_image_vis, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST)
affordance_vis = (0.5*cv2.cvtColor(input_image_vis, cv2.COLOR_RGB2BGR) + 0.5*affordance_vis).astype(np.uint8)
if rotate_idx == best_pix_ind[0]:
affordance_vis = cv2.circle(affordance_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2)
if tmp_row_vis is None:
tmp_row_vis = affordance_vis
else:
tmp_row_vis = np.concatenate((tmp_row_vis,affordance_vis), axis=1)
if vis is None:
vis = tmp_row_vis
else:
vis = np.concatenate((vis,tmp_row_vis), axis=0)
return vis
def get_difference(color_heightmap, color_space, bg_color_heightmap):
color_space = np.concatenate((color_space, np.asarray([[0.0, 0.0, 0.0]])), axis=0)
color_space.shape = (color_space.shape[0], 1, 1, color_space.shape[1])
color_space = np.tile(color_space, (1, color_heightmap.shape[0], color_heightmap.shape[1], 1))
# Normalize color heightmaps
color_heightmap = color_heightmap.astype(float)/255.0
color_heightmap.shape = (1, color_heightmap.shape[0], color_heightmap.shape[1], color_heightmap.shape[2])
color_heightmap = np.tile(color_heightmap, (color_space.shape[0], 1, 1, 1))
bg_color_heightmap = bg_color_heightmap.astype(float)/255.0
bg_color_heightmap.shape = (1, bg_color_heightmap.shape[0], bg_color_heightmap.shape[1], bg_color_heightmap.shape[2])
bg_color_heightmap = np.tile(bg_color_heightmap, (color_space.shape[0], 1, 1, 1))
# Compute nearest neighbor distances to key colors
key_color_dist = np.sqrt(np.sum(np.power(color_heightmap - color_space,2), axis=3))
# key_color_dist_prob = F.softmax(Variable(torch.from_numpy(key_color_dist), volatile=True), dim=0).data.numpy()
bg_key_color_dist = np.sqrt(np.sum(np.power(bg_color_heightmap - color_space,2), axis=3))
# bg_key_color_dist_prob = F.softmax(Variable(torch.from_numpy(bg_key_color_dist), volatile=True), dim=0).data.numpy()
key_color_match = np.argmin(key_color_dist, axis=0)
bg_key_color_match = np.argmin(bg_key_color_dist, axis=0)
key_color_match[key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 1
bg_key_color_match[bg_key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 2
return np.sum(key_color_match == bg_key_color_match).astype(float)/np.sum(bg_key_color_match < color_space.shape[0]).astype(float)
# Get rotation matrix from euler angles
def euler2rotm(theta):
R_x = np.array([[1, 0, 0 ],
[0, math.cos(theta[0]), -math.sin(theta[0]) ],
[0, math.sin(theta[0]), math.cos(theta[0]) ]
])
R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ],
[0, 1, 0 ],
[-math.sin(theta[1]), 0, math.cos(theta[1]) ]
])
R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
[math.sin(theta[2]), math.cos(theta[2]), 0],
[0, 0, 1]
])
R = np.dot(R_z, np.dot( R_y, R_x ))
return R
# Checks if a matrix is a valid rotation matrix.
def isRotm(R) :
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype = R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
def rotm2euler(R) :
assert(isRotm(R))
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
singular = sy < 1e-6
if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0
return np.array([x, y, z])
def angle2rotm(angle, axis, point=None):
# Copyright (c) 2006-2018, Christoph Gohlke
sina = math.sin(angle)
cosa = math.cos(angle)
axis = axis/np.linalg.norm(axis)
# Rotation matrix around unit vector
R = np.diag([cosa, cosa, cosa])
R += np.outer(axis, axis) * (1.0 - cosa)
axis *= sina
R += np.array([[ 0.0, -axis[2], axis[1]],
[ axis[2], 0.0, -axis[0]],
[-axis[1], axis[0], 0.0]])
M = np.identity(4)
M[:3, :3] = R
if point is not None:
# Rotation not around origin
point = np.array(point[:3], dtype=np.float64, copy=False)
M[:3, 3] = point - np.dot(R, point)
return M
def rotm2angle(R):
# From: euclideanspace.com
epsilon = 0.01 # Margin to allow for rounding errors
epsilon2 = 0.1 # Margin to distinguish between 0 and 180 degrees
assert(isRotm(R))
if ((abs(R[0][1]-R[1][0])< epsilon) and (abs(R[0][2]-R[2][0])< epsilon) and (abs(R[1][2]-R[2][1])< epsilon)):
# Singularity found
# First check for identity matrix which must have +1 for all terms in leading diagonaland zero in other terms
if ((abs(R[0][1]+R[1][0]) < epsilon2) and (abs(R[0][2]+R[2][0]) < epsilon2) and (abs(R[1][2]+R[2][1]) < epsilon2) and (abs(R[0][0]+R[1][1]+R[2][2]-3) < epsilon2)):
# this singularity is identity matrix so angle = 0
return [0,1,0,0] # zero angle, arbitrary axis
# Otherwise this singularity is angle = 180
angle = np.pi
xx = (R[0][0]+1)/2
yy = (R[1][1]+1)/2
zz = (R[2][2]+1)/2
xy = (R[0][1]+R[1][0])/4
xz = (R[0][2]+R[2][0])/4
yz = (R[1][2]+R[2][1])/4
if ((xx > yy) and (xx > zz)): # R[0][0] is the largest diagonal term
if (xx< epsilon):
x = 0
y = 0.7071
z = 0.7071
else:
x = np.sqrt(xx)
y = xy/x
z = xz/x
elif (yy > zz): # R[1][1] is the largest diagonal term
if (yy< epsilon):
x = 0.7071
y = 0
z = 0.7071
else:
y = np.sqrt(yy)
x = xy/y
z = yz/y
else: # R[2][2] is the largest diagonal term so base result on this
if (zz< epsilon):
x = 0.7071
y = 0.7071
z = 0
else:
z = np.sqrt(zz)
x = xz/z
y = yz/z
return [angle,x,y,z] # Return 180 deg rotation
# As we have reached here there are no singularities so we can handle normally
s = np.sqrt((R[2][1] - R[1][2])*(R[2][1] - R[1][2]) + (R[0][2] - R[2][0])*(R[0][2] - R[2][0]) + (R[1][0] - R[0][1])*(R[1][0] - R[0][1])) # used to normalise
if (abs(s) < 0.001):
s = 1
# Prevent divide by zero, should not happen if matrix is orthogonal and should be
# Caught by singularity test above, but I've left it in just in case
angle = np.arccos(( R[0][0] + R[1][1] + R[2][2] - 1)/2)
x = (R[2][1] - R[1][2])/s
y = (R[0][2] - R[2][0])/s
z = (R[1][0] - R[0][1])/s
return [angle,x,y,z]
# Cross entropy loss for 2D outputs
class CrossEntropyLoss2d(nn.Module):
def __init__(self, weight=None, size_average=True):
super(CrossEntropyLoss2d, self).__init__()
self.nll_loss = nn.NLLLoss2d(weight, size_average)
def forward(self, inputs, targets):
return self.nll_loss(F.log_softmax(inputs, dim=1), targets)