diff --git a/cfg.py b/cfg.py
index ab7d5873..bfc68e44 100644
--- a/cfg.py
+++ b/cfg.py
@@ -154,7 +154,7 @@ def load_conv(buf, start, conv_model):
     num_w = conv_model.weight.numel()
     num_b = conv_model.bias.numel()
     conv_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
-    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
+    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view_as(conv_model.weight.data)); start = start + num_w
     return start
 
 def save_conv(fp, conv_model):
@@ -172,7 +172,7 @@ def load_conv_bn(buf, start, conv_model, bn_model):
     bn_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
     bn_model.running_mean.copy_(torch.from_numpy(buf[start:start+num_b]));  start = start + num_b
     bn_model.running_var.copy_(torch.from_numpy(buf[start:start+num_b]));   start = start + num_b
-    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w 
+    conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view_as(conv_model.weight.data)); start = start + num_w
     return start
 
 def save_conv_bn(fp, conv_model, bn_model):
@@ -193,7 +193,7 @@ def load_fc(buf, start, fc_model):
     num_w = fc_model.weight.numel()
     num_b = fc_model.bias.numel()
     fc_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b]));     start = start + num_b
-    fc_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]));   start = start + num_w 
+    fc_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w]).view_as(fc_model.weight.data));   start = start + num_w
     return start
 
 def save_fc(fp, fc_model):
diff --git a/darknet.py b/darknet.py
index 623da39f..e5cb5dda 100644
--- a/darknet.py
+++ b/darknet.py
@@ -28,10 +28,10 @@ def forward(self, x):
         assert(W % stride == 0)
         ws = stride
         hs = stride
-        x = x.view(B, C, H/hs, hs, W/ws, ws).transpose(3,4).contiguous()
-        x = x.view(B, C, H/hs*W/ws, hs*ws).transpose(2,3).contiguous()
-        x = x.view(B, C, hs*ws, H/hs, W/ws).transpose(1,2).contiguous()
-        x = x.view(B, hs*ws*C, H/hs, W/ws)
+        x = x.view(B, C, H//hs, hs, W//ws, ws).transpose(3,4).contiguous()
+        x = x.view(B, C, H//hs*W//ws, hs*ws).transpose(2,3).contiguous()
+        x = x.view(B, C, hs*ws, H//hs, W//ws).transpose(1,2).contiguous()
+        x = x.view(B, hs*ws*C, H//hs, W//ws)
         return x
 
 class GlobalAvgPool2d(nn.Module):
@@ -146,7 +146,7 @@ def create_network(self, blocks):
                 kernel_size = int(block['size'])
                 stride = int(block['stride'])
                 is_pad = int(block['pad'])
-                pad = (kernel_size-1)/2 if is_pad else 0
+                pad = (kernel_size-1)//2 if is_pad else 0
                 activation = block['activation']
                 model = nn.Sequential()
                 if batch_normalize:
diff --git a/region_loss.py b/region_loss.py
index b2f9f914..1e0548ab 100644
--- a/region_loss.py
+++ b/region_loss.py
@@ -63,7 +63,7 @@ def build_targets(pred_corners, target, anchors, num_anchors, num_classes, nH, n
             gy8 = target[b][t*21+18]*nH
 
             cur_gt_corners = torch.FloatTensor([gx0/nW,gy0/nH,gx1/nW,gy1/nH,gx2/nW,gy2/nH,gx3/nW,gy3/nH,gx4/nW,gy4/nH,gx5/nW,gy5/nH,gx6/nW,gy6/nH,gx7/nW,gy7/nH,gx8/nW,gy8/nH]).repeat(nAnchors,1).t() # 16 x nAnchors
-            cur_confs  = torch.max(cur_confs, corner_confidences9(cur_pred_corners, cur_gt_corners)) # some irrelevant areas are filtered, in the same grid multiple anchor boxes might exceed the threshold
+            cur_confs  = torch.max(cur_confs, corner_confidences9(cur_pred_corners, cur_gt_corners)).view_as(conf_mask[b]) # some irrelevant areas are filtered, in the same grid multiple anchor boxes might exceed the threshold
         conf_mask[b][cur_confs>sil_thresh] = 0
     if seen < -1:#6400:
        tx0.fill_(0.5)
@@ -202,24 +202,24 @@ def forward(self, output, target):
         pred_corners = torch.cuda.FloatTensor(18, nB*nA*nH*nW)
         grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
         grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
-        pred_corners[0]  = (x0.data + grid_x) / nW
-        pred_corners[1]  = (y0.data + grid_y) / nH
-        pred_corners[2]  = (x1.data + grid_x) / nW
-        pred_corners[3]  = (y1.data + grid_y) / nH
-        pred_corners[4]  = (x2.data + grid_x) / nW
-        pred_corners[5]  = (y2.data + grid_y) / nH
-        pred_corners[6]  = (x3.data + grid_x) / nW
-        pred_corners[7]  = (y3.data + grid_y) / nH
-        pred_corners[8]  = (x4.data + grid_x) / nW
-        pred_corners[9]  = (y4.data + grid_y) / nH
-        pred_corners[10]  = (x5.data + grid_x) / nW
-        pred_corners[11]  = (y5.data + grid_y) / nH
-        pred_corners[12] = (x6.data + grid_x) / nW
-        pred_corners[13] = (y6.data + grid_y) / nH
-        pred_corners[14] = (x7.data + grid_x) / nW
-        pred_corners[15] = (y7.data + grid_y) / nH
-        pred_corners[16] = (x8.data + grid_x) / nW
-        pred_corners[17] = (y8.data + grid_y) / nH
+        pred_corners[0]  = (x0.data.view(-1) + grid_x) / nW
+        pred_corners[1]  = (y0.data.view(-1) + grid_y) / nH
+        pred_corners[2]  = (x1.data.view(-1) + grid_x) / nW
+        pred_corners[3]  = (y1.data.view(-1) + grid_y) / nH
+        pred_corners[4]  = (x2.data.view(-1) + grid_x) / nW
+        pred_corners[5]  = (y2.data.view(-1) + grid_y) / nH
+        pred_corners[6]  = (x3.data.view(-1) + grid_x) / nW
+        pred_corners[7]  = (y3.data.view(-1) + grid_y) / nH
+        pred_corners[8]  = (x4.data.view(-1) + grid_x) / nW
+        pred_corners[9]  = (y4.data.view(-1) + grid_y) / nH
+        pred_corners[10]  = (x5.data.view(-1) + grid_x) / nW
+        pred_corners[11]  = (y5.data.view(-1) + grid_y) / nH
+        pred_corners[12] = (x6.data.view(-1) + grid_x) / nW
+        pred_corners[13] = (y6.data.view(-1) + grid_y) / nH
+        pred_corners[14] = (x7.data.view(-1) + grid_x) / nW
+        pred_corners[15] = (y7.data.view(-1) + grid_y) / nH
+        pred_corners[16] = (x8.data.view(-1) + grid_x) / nW
+        pred_corners[17] = (y8.data.view(-1) + grid_y) / nH
         gpu_matrix = pred_corners.transpose(0,1).contiguous().view(-1,18)
         pred_corners = convert2cpu(gpu_matrix)
         t2 = time.time()
@@ -248,7 +248,7 @@ def forward(self, output, target):
         tx8        = Variable(tx8.cuda())
         ty8        = Variable(ty8.cuda())
         tconf      = Variable(tconf.cuda())
-        tcls       = Variable(tcls.view(-1)[cls_mask].long().cuda())
+        tcls       = Variable(tcls[cls_mask].long().cuda())
         coord_mask = Variable(coord_mask.cuda())
         conf_mask  = Variable(conf_mask.cuda().sqrt())
         cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
diff --git a/train.py b/train.py
index 66c91a36..f2b86cb4 100644
--- a/train.py
+++ b/train.py
@@ -343,7 +343,7 @@ def truths_length(truths):
     init_height       = model.height
     test_width        = 672
     test_height       = 672
-    init_epoch        = model.seen/nsamples 
+    init_epoch        = model.seen//nsamples
 
     # Variable to save
     training_iters          = []
diff --git a/utils.py b/utils.py
index c4f1b754..af074f6a 100644
--- a/utils.py
+++ b/utils.py
@@ -884,7 +884,7 @@ def get_color(c, x, max_val):
 def read_truths(lab_path):
     if os.path.getsize(lab_path):
         truths = np.loadtxt(lab_path)
-        truths = truths.reshape(truths.size/21, 21) # to avoid single truth problem
+        truths = truths.reshape(truths.size//21, 21) # to avoid single truth problem
         return truths
     else:
         return np.array([])
@@ -1008,7 +1008,7 @@ def file_lines(thefilepath):
         buffer = thefile.read(8192*1024)
         if not buffer:
             break
-        count += buffer.count('\n')
+        count += buffer.count(b'\n')
     thefile.close( )
     return count