From f850084194ddccc6d401d5b11f61facc20ec2b75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=94=B3=E7=91=9E=E7=8F=89=20=28Ruimin=20Shen=29?=
 <45179423@qq.com>
Date: Mon, 11 Feb 2019 21:29:54 +0800
Subject: [PATCH] fix

---
 convert_caffe_torch.py      |  4 ++--
 convert_tf_torch.py         |  9 ++++-----
 convert_torch_onnx.py       |  2 +-
 demo_data.py                |  5 +++--
 demo_label.py               |  5 +++--
 estimate.py                 | 27 ++++++++++++++-----------
 model/__init__.py           | 12 ++----------
 model/dnn/inception4.py     |  6 +++---
 model/dnn/mobilenet.py      |  6 +++---
 model/dnn/mobilenet2.py     |  9 ++++-----
 model/dnn/resnet.py         |  6 +++---
 model/stages/openpose.py    | 12 ++++++------
 receptive_field_analyzer.py | 10 ++++++----
 requirements.txt            |  4 ++--
 train.py                    | 39 +++++++++++++------------------------
 utils/__init__.py           |  6 ------
 16 files changed, 72 insertions(+), 90 deletions(-)

diff --git a/convert_caffe_torch.py b/convert_caffe_torch.py
index 7c35f88..4d5ac07 100755
--- a/convert_caffe_torch.py
+++ b/convert_caffe_torch.py
@@ -123,9 +123,9 @@ def main():
             stages=inference.stages.state_dict(),
         ), 0)
     finally:
-        for stage, output in enumerate(inference(torch.autograd.Variable(tensor, volatile=True))):
+        for stage, output in enumerate(inference(tensor)):
             for name, feature in output.items():
-                val = feature.data.numpy()
+                val = feature.detach().numpy()
                 print('\t'.join(map(str, [
                     'stage%d/%s' % (stage, name),
                     'x'.join(map(str, val.shape)),
diff --git a/convert_tf_torch.py b/convert_tf_torch.py
index 31e722e..84fc0ed 100755
--- a/convert_tf_torch.py
+++ b/convert_tf_torch.py
@@ -127,15 +127,14 @@ def main():
                         'x'.join(map(str, val.shape)),
                         utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(),
                     ])))
-            _tensor = torch.autograd.Variable(tensor, volatile=True)
-            val = dnn(_tensor).data.numpy()
+            val = dnn(tensor).detach().numpy()
             print('\t'.join(map(str, [
                 'x'.join(map(str, val.shape)),
                 utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(),
             ])))
-            for stage, output in enumerate(inference(_tensor)):
+            for stage, output in enumerate(inference(tensor)):
                 for name, feature in output.items():
-                    val = feature.data.numpy()
+                    val = feature.detach().numpy()
                     print('\t'.join(map(str, [
                         'stage%d/%s' % (stage, name),
                         'x'.join(map(str, val.shape)),
@@ -144,7 +143,7 @@ def main():
             forward = inference.forward
             inference.forward = lambda self, *x: list(forward(self, *x)[-1].values())
             with SummaryWriter(model_dir) as writer:
-                writer.add_graph(inference, (_tensor,))
+                writer.add_graph(inference, (tensor,))
 
 
 def make_args():
diff --git a/convert_torch_onnx.py b/convert_torch_onnx.py
index 938a0c7..b605d3f 100755
--- a/convert_torch_onnx.py
+++ b/convert_torch_onnx.py
@@ -58,7 +58,7 @@ def main():
     inference = model.Inference(config, dnn, stages)
     inference.eval()
     logging.info(humanize.naturalsize(sum(var.cpu().numpy().nbytes for var in inference.state_dict().values())))
-    image = torch.autograd.Variable(torch.randn(args.batch_size, 3, height, width), volatile=True)
+    image = torch.randn(args.batch_size, 3, height, width)
     path = model_dir + '.onnx'
     logging.info('save ' + path)
     forward = inference.forward
diff --git a/demo_data.py b/demo_data.py
index 15ae06f..6db1a62 100755
--- a/demo_data.py
+++ b/demo_data.py
@@ -42,10 +42,11 @@ def main():
         utils.modify_config(config, cmd)
     with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
         logging.config.dictConfig(yaml.load(f))
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     cache_dir = utils.get_cache_dir(config)
     _, num_parts = utils.get_dataset_mappers(config)
     limbs_index = utils.get_limbs_index(config)
-    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(config))
+    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(config)).to(device)
     draw_points = utils.visualize.DrawPoints(limbs_index, colors=config.get('draw_points', 'colors').split())
     _draw_points = utils.visualize.DrawPoints(limbs_index, thickness=1)
     draw_bbox = utils.visualize.DrawBBox()
@@ -63,7 +64,7 @@ def main():
     except configparser.NoOptionError:
         workers = multiprocessing.cpu_count()
     sizes = utils.train.load_sizes(config)
-    feature_sizes = [model.feature_size(dnn, *size) for size in sizes]
+    feature_sizes = [dnn(torch.randn(1, 3, *size).to(device)).size()[-2:] for size in sizes]
     collate_fn = utils.data.Collate(
         config,
         transform.parse_transform(config, config.get('transform', 'resize_train')),
diff --git a/demo_label.py b/demo_label.py
index 467e92d..a8fefdb 100755
--- a/demo_label.py
+++ b/demo_label.py
@@ -86,10 +86,11 @@ def main():
         utils.modify_config(config, cmd)
     with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
         logging.config.dictConfig(yaml.load(f))
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     cache_dir = utils.get_cache_dir(config)
     _, num_parts = utils.get_dataset_mappers(config)
     limbs_index = utils.get_limbs_index(config)
-    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(config))
+    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(config)).to(device)
     logging.info(humanize.naturalsize(sum(var.cpu().numpy().nbytes for var in dnn.state_dict().values())))
     size = tuple(map(int, config.get('image', 'size').split()))
     draw_points = utils.visualize.DrawPoints(limbs_index, colors=config.get('draw_points', 'colors').split())
@@ -110,7 +111,7 @@ def main():
     collate_fn = utils.data.Collate(
         config,
         transform.parse_transform(config, config.get('transform', 'resize_train')),
-        [size], [model.feature_size(dnn, *size)],
+        [size], [dnn(torch.randn(1, 3, *size).to(device)).size()[-2:]],
         maintain=config.getint('data', 'maintain'),
         transform_image=transform.get_transform(config, config.get('transform', 'image_train').split()),
     )
diff --git a/estimate.py b/estimate.py
index 309982b..fa51fea 100755
--- a/estimate.py
+++ b/estimate.py
@@ -30,8 +30,11 @@
 import torch.optim
 import torch.utils.data
 import torch.nn as nn
-from caffe2.proto import caffe2_pb2
-from caffe2.python import workspace
+try:
+    from caffe2.proto import caffe2_pb2
+    from caffe2.python import workspace
+except ImportError:
+    pass
 import humanize
 import pybenchmark
 import cv2
@@ -47,6 +50,7 @@ class Estimate(object):
     def __init__(self, args, config):
         self.args = args
         self.config = config
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.cache_dir = utils.get_cache_dir(config)
         self.model_dir = utils.get_model_dir(config)
         _, self.num_parts = utils.get_dataset_mappers(config)
@@ -69,7 +73,7 @@ def __init__(self, args, config):
             with open(os.path.join(self.model_dir, 'predict_net.pb'), 'rb') as f:
                 predict_net.ParseFromString(f.read())
             p = workspace.Predictor(init_net, predict_net)
-            self.inference = lambda tensor: [{'parts': torch.autograd.Variable(torch.from_numpy(parts)), 'limbs': torch.autograd.Variable(torch.from_numpy(limbs))} for parts, limbs in zip(*[iter(p.run([tensor.data.cpu().numpy()]))] * 2)]
+            self.inference = lambda tensor: [{'parts': torch.from_numpy(parts), 'limbs': torch.from_numpy(limbs)} for parts, limbs in zip(*[iter(p.run([tensor.detach().cpu().numpy()]))] * 2)]
         else:
             self.step, self.epoch, self.dnn, self.stages = self.load()
             self.inference = model.Inference(config, self.dnn, self.stages)
@@ -133,13 +137,13 @@ def __call__(self):
         image_resized = self.resize(image_bgr, self.height, self.width)
         image = self.transform_image(image_resized)
         tensor = self.transform_tensor(image)
-        tensor = utils.ensure_device(tensor.unsqueeze(0))
-        outputs = pybenchmark.profile('inference')(self.inference)(torch.autograd.Variable(tensor, volatile=True))
+        tensor = tensor.unsqueeze(0).to(self.device)
+        outputs = pybenchmark.profile('inference')(self.inference)(tensor)
         if hasattr(self, 'draw_cluster'):
             output = outputs[-1]
-            parts, limbs = (output[name][0].data for name in 'parts, limbs'.split(', '))
+            parts, limbs = (output[name][0] for name in 'parts, limbs'.split(', '))
             parts = parts[:-1]
-            parts, limbs = (t.cpu().numpy() for t in (parts, limbs))
+            parts, limbs = (t.detach().cpu().numpy() for t in (parts, limbs))
             try:
                 interpolation = getattr(cv2, 'INTER_' + self.config.get('estimate', 'interpolation').upper())
                 parts, limbs = (np.stack([cv2.resize(feature, (self.width, self.height), interpolation=interpolation) for feature in a]) for a in (parts, limbs))
@@ -159,7 +163,7 @@ def __call__(self):
                 image_result = self.draw_cluster(image_result, cluster)
         else:
             image_result = image_resized.copy()
-            feature = self.get_feature(outputs).data.cpu().numpy()
+            feature = self.get_feature(outputs).detach().cpu().numpy()
             image_result = self.draw_feature(image_result, feature)
         if self.args.output:
             if not hasattr(self, 'writer'):
@@ -183,10 +187,11 @@ def main():
         utils.modify_config(config, cmd)
     with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
         logging.config.dictConfig(yaml.load(f))
-    detect = Estimate(args, config)
+    estimate = Estimate(args, config)
     try:
-        while detect.cap.isOpened():
-            detect()
+        with torch.no_grad():
+            while estimate.cap.isOpened():
+                estimate()
     except KeyboardInterrupt:
         logging.warning('interrupted')
     finally:
diff --git a/model/__init__.py b/model/__init__.py
index c13e7f7..d615bd8 100755
--- a/model/__init__.py
+++ b/model/__init__.py
@@ -47,14 +47,6 @@ def channel_dict(num_parts, num_limbs):
     ])
 
 
-def feature_size(dnn, height, width):
-    image = torch.autograd.Variable(torch.randn(1, 3, height, width), volatile=True)
-    if next(dnn.parameters()).is_cuda:
-        image = image.cuda()
-    feature = dnn(image)
-    return feature.size()[-2:]
-
-
 class Inference(nn.Module):
     def __init__(self, config, dnn, stages):
         nn.Module.__init__(self)
@@ -81,10 +73,10 @@ def __init__(self, config, data, limbs_index, height, width):
         self.width = width
 
     def __call__(self, **kwargs):
-        mask = torch.autograd.Variable(self.data['mask'].float())
+        mask = self.data['mask'].float()
         batch_size, rows, cols = mask.size()
         mask = mask.view(batch_size, 1, rows, cols)
-        data = {name: torch.autograd.Variable(self.data[name]) for name in kwargs}
+        data = {name: self.data[name] for name in kwargs}
         return {name: self.loss(mask, data[name], feature) for name, feature in kwargs.items()}
 
     def loss(self, mask, label, feature):
diff --git a/model/dnn/inception4.py b/model/dnn/inception4.py
index 98e6608..c9a88d5 100755
--- a/model/dnn/inception4.py
+++ b/model/dnn/inception4.py
@@ -319,10 +319,10 @@ def init(self, config_channels):
             beta = True
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.weight = nn.init.kaiming_normal(m.weight)
+                m.weight = nn.init.kaiming_normal_(m.weight)
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.fill_(1)
+                m.bias.zero_()
                 m.weight.requires_grad = gamma
                 m.bias.requires_grad = beta
         try:
diff --git a/model/dnn/mobilenet.py b/model/dnn/mobilenet.py
index 2cfc186..efff980 100755
--- a/model/dnn/mobilenet.py
+++ b/model/dnn/mobilenet.py
@@ -75,10 +75,10 @@ def __init__(self, config_channels):
 
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.weight = nn.init.kaiming_normal(m.weight)
+                m.weight = nn.init.kaiming_normal_(m.weight)
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.fill_(1)
+                m.bias.zero_()
 
     def forward(self, x):
         return self.layers(x)
diff --git a/model/dnn/mobilenet2.py b/model/dnn/mobilenet2.py
index 775d798..818b4cb 100755
--- a/model/dnn/mobilenet2.py
+++ b/model/dnn/mobilenet2.py
@@ -117,12 +117,11 @@ def _initialize_weights(self):
                 if m.bias is not None:
                     m.bias.data.zero_()
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.data.fill_(1) # PyTorch's bug
+                m.bias.data.zero_() # PyTorch's bug
             elif isinstance(m, nn.Linear):
-                n = m.weight.size(1)
-                m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
+                m.weight.normal_(0, 0.01)
+                m.bias.zero_()
 
 
 class MobileNet2Dilate2(MobileNet2):
diff --git a/model/dnn/resnet.py b/model/dnn/resnet.py
index 1c2add2..85e055c 100755
--- a/model/dnn/resnet.py
+++ b/model/dnn/resnet.py
@@ -118,10 +118,10 @@ def __init__(self, config_channels, anchors, num_cls, block, layers):
 
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.weight = nn.init.kaiming_normal(m.weight)
+                m.weight = nn.init.kaiming_normal_(m.weight)
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.fill_(1)
+                m.bias.zero_()
 
     def _make_layer(self, config_channels, prefix, block, channels, blocks, stride=1):
         layers = []
diff --git a/model/stages/openpose.py b/model/stages/openpose.py
index 97e5dc1..b5eff8c 100755
--- a/model/stages/openpose.py
+++ b/model/stages/openpose.py
@@ -58,10 +58,10 @@ def __init__(self, config_channels, channel_dict, channels_dnn, prefix):
     def init(self):
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.weight = nn.init.xavier_normal(m.weight)
+                m.weight = nn.init.xavier_normal_(m.weight)
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.fill_(1)
+                m.bias.zero_()
 
     def forward(self, x, **kwargs):
         return {name: var(x) for name, var in self._modules.items()}
@@ -85,10 +85,10 @@ def __init__(self, config_channels, channels, channels_dnn, prefix):
     def init(self):
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                m.weight = nn.init.xavier_normal(m.weight)
+                m.weight = nn.init.xavier_normal_(m.weight)
             elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
+                m.weight.fill_(1)
+                m.bias.zero_()
 
     def forward(self, x, **kwargs):
         x = torch.cat([kwargs[name] for name in ('limbs', 'parts')] + [x], 1)
diff --git a/receptive_field_analyzer.py b/receptive_field_analyzer.py
index ace2e99..84de093 100755
--- a/receptive_field_analyzer.py
+++ b/receptive_field_analyzer.py
@@ -54,6 +54,7 @@ class Analyzer(object):
     def __init__(self, args, config):
         self.args = args
         self.config = config
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.model_dir = utils.get_model_dir(config)
         _, self.num_parts = utils.get_dataset_mappers(config)
         self.limbs_index = utils.get_limbs_index(config)
@@ -64,7 +65,8 @@ def __init__(self, args, config):
         if torch.cuda.is_available():
             self.inference.cuda()
         self.height, self.width = tuple(map(int, config.get('image', 'size').split()))
-        output = self.dnn(torch.autograd.Variable(utils.ensure_device(torch.zeros(1, 3, self.height, self.width)), volatile=True))
+        t = torch.zeros(1, 3, self.height, self.width).to(self.device)
+        output = self.dnn(t)
         _, _, self.rows, self.cols = output.size()
         self.i, self.j = self.rows // 2, self.cols // 2
         self.output = output[:, :, self.i, self.j]
@@ -83,11 +85,11 @@ def __call__(self):
             for i, _yx in enumerate(torch.unbind(yx)):
                 y, x = torch.unbind(_yx)
                 tensor[i, :, y, x] = 1
-            tensor = utils.ensure_device(tensor)
-            output = self.dnn(torch.autograd.Variable(tensor, volatile=True))
+            tensor = tensor.to(self.device)
+            output = self.dnn(tensor)
             output = output[:, :, self.i, self.j]
             cmp = output == self.output
-            cmp = torch.prod(cmp, -1).data
+            cmp = torch.prod(cmp, -1)
             for _yx, c in zip(torch.unbind(yx), torch.unbind(cmp)):
                 y, x = torch.unbind(_yx)
                 changed[y, x] = c
diff --git a/requirements.txt b/requirements.txt
index 034129d..b78a9dd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 tqdm
 pybenchmark
 graphviz
-torch<=0.3.1
+torch>=0.4.0
 pandas
 onnx
 onnx_caffe2
@@ -17,7 +17,7 @@ Pillow
 PyQt5
 scipy
 skimage
-tensorboardX
+tensorboardX>=1.2
 tensorflow
 PyYAML
 pycocotools
diff --git a/train.py b/train.py
index 6af109a..7a20654 100755
--- a/train.py
+++ b/train.py
@@ -55,15 +55,6 @@
 #import eval as _eval
 
 
-def ensure_model(model):
-    if torch.cuda.is_available():
-        model.cuda()
-        if torch.cuda.device_count() > 1:
-            logging.info('%d GPUs are used' % torch.cuda.device_count())
-            model = nn.DataParallel(model).cuda()
-    return model
-
-
 class SummaryWorker(multiprocessing.Process):
     def __init__(self, env):
         super(SummaryWorker, self).__init__()
@@ -107,7 +98,7 @@ def run(self):
             inference = model.Inference(self.config, dnn, stages)
             forward = inference.forward
             inference.forward = lambda self, *x: list(forward(self, *x)[-1].values())
-            self.writer.add_graph(inference, (torch.autograd.Variable(tensor),))
+            self.writer.add_graph(inference, (tensor,))
         except:
             traceback.print_exc()
         while True:
@@ -122,9 +113,9 @@ def run(self):
 
     def copy_scalar(self, **kwargs):
         step, loss_total, losses, losses_hparam = (kwargs[key] for key in 'step, loss_total, losses, losses_hparam'.split(', '))
-        loss_total = loss_total.data.clone().cpu().numpy()
-        losses = [{name: l.data.clone().cpu().numpy() for name, l in loss.items()} for loss in losses]
-        losses_hparam = [{name: l.data.clone().cpu().numpy() for name, l in loss.items()} for loss in losses_hparam]
+        loss_total = loss_total.detach().cpu().numpy()
+        losses = [{name: l.detach().cpu().numpy() for name, l in loss.items()} for loss in losses]
+        losses_hparam = [{name: l.detach().cpu().numpy() for name, l in loss.items()} for loss in losses_hparam]
         return dict(
             step=step,
             loss_total=loss_total,
@@ -144,7 +135,7 @@ def copy_image(self, **kwargs):
         step, height, width, data, outputs = (kwargs[key] for key in 'step, height, width, data, outputs'.split(', '))
         image, mask, keypoints, yx_min, yx_max, parts, limbs, index = (data[key].clone().cpu().numpy() for key in 'image, mask, keypoints, yx_min, yx_max, parts, limbs, index'.split(', '))
         output = outputs[self.config.getint('summary_image', 'stage')]
-        output = {name: output[name].data.clone().cpu().numpy() for name in self.config.get('summary_image', 'output').split()}
+        output = {name: output[name].detach().cpu().numpy() for name in self.config.get('summary_image', 'output').split()}
         return dict(
             step=step, height=height, width=width,
             image=image, mask=mask, keypoints=keypoints, yx_min=yx_min, yx_max=yx_max, parts=parts, limbs=limbs, index=index,
@@ -233,6 +224,7 @@ class Train(object):
     def __init__(self, args, config):
         self.args = args
         self.config = config
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.model_dir = utils.get_model_dir(config)
         self.cache_dir = utils.get_cache_dir(config)
         _, self.num_parts = utils.get_dataset_mappers(config)
@@ -253,7 +245,7 @@ def __init__(self, args, config):
             path = os.path.expanduser(os.path.expandvars(self.args.finetune))
             logging.info('finetune from ' + path)
             self.finetune(self.dnn, path)
-        self.inference = ensure_model(self.inference)
+        self.inference = self.inference.to(self.device)
         self.inference.train()
         self.optimizer = eval(self.config.get('train', 'optimizer'))(filter(lambda p: p.requires_grad, self.inference.parameters()), self.args.learning_rate)
 
@@ -282,12 +274,10 @@ def get_loader(self, dnn):
         logging.info('num_examples=%d' % len(dataset))
         try:
             workers = self.config.getint('data', 'workers')
-            if torch.cuda.is_available():
-                workers = workers * torch.cuda.device_count()
         except configparser.NoOptionError:
             workers = multiprocessing.cpu_count()
         sizes = utils.train.load_sizes(self.config)
-        feature_sizes = [model.feature_size(dnn, *size) for size in sizes]
+        feature_sizes = [dnn(torch.randn(1, 3, *size).to(self.device)).size()[-2:] for size in sizes]
         collate_fn = utils.data.Collate(
             self.config,
             transform.parse_transform(self.config, self.config.get('transform', 'resize_train')),
@@ -297,7 +287,7 @@ def get_loader(self, dnn):
             transform_tensor=transform.get_transform(self.config, self.config.get('transform', 'tensor').split()),
             dir=os.path.join(self.model_dir, 'exception'),
         )
-        return torch.utils.data.DataLoader(dataset, batch_size=self.args.batch_size * torch.cuda.device_count() if torch.cuda.is_available() else self.args.batch_size, shuffle=True, num_workers=workers, collate_fn=collate_fn, pin_memory=torch.cuda.is_available())
+        return torch.utils.data.DataLoader(dataset, batch_size=self.args.batch_size, shuffle=True, num_workers=workers, collate_fn=collate_fn, pin_memory=torch.cuda.is_available())
 
     def load(self):
         try:
@@ -341,8 +331,8 @@ def iterate(self, data):
         for key in data:
             t = data[key]
             if torch.is_tensor(t):
-                data[key] = utils.ensure_device(t)
-        tensor = torch.autograd.Variable(data['tensor'])
+                data[key] = t.to(self.device)
+        tensor = data['tensor']
         outputs = pybenchmark.profile('inference')(self.inference)(tensor)
         height, width = data['image'].size()[1:3]
         loss = pybenchmark.profile('loss')(model.Loss(self.config, data, self.limbs_index, height, width))
@@ -406,14 +396,14 @@ def __call__(self):
 
     def check_nan(self, **kwargs):
         step, loss_total, losses, data = (kwargs[key] for key in 'step, loss_total, losses, data'.split(', '))
-        if np.isnan(loss_total.data.cpu()[0]):
+        if np.isnan(loss_total.item()):
             dump_dir = os.path.join(self.model_dir, str(step))
             os.makedirs(dump_dir, exist_ok=True)
             torch.save({name: collections.OrderedDict([(key, var.cpu()) for key, var in getattr(self, name).state_dict().items()]) for name in 'dnn, stages'.split(', ')}, os.path.join(dump_dir, 'model.pth'))
             torch.save(data, os.path.join(dump_dir, 'data.pth'))
             for i, loss in enumerate(losses):
                 for name, l in loss.items():
-                    logging.warning('%s%d=%f' % (name, i, l.data.cpu()[0]))
+                    logging.warning('%s%d=%f' % (name, i, l.item()))
             raise OverflowError('NaN loss detected, dump runtime information into ' + dump_dir)
 
     def save(self, **kwargs):
@@ -431,8 +421,7 @@ def eval(self, **kwargs):
             self.backup_best(cls_ap, e.path)
         except:
             traceback.print_exc()
-        if torch.cuda.is_available():
-            self.inference.cuda()
+        self.inference = self.inference.to(self.device)
 
     def backup_best(self, cls_ap, path):
         try:
diff --git a/utils/__init__.py b/utils/__init__.py
index e9dbb35..93d49ee 100755
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -149,12 +149,6 @@ def modify_config(config, cmd):
             pass
 
 
-def ensure_device(t, device_id=None, async=False):
-    if torch.cuda.is_available():
-        t = t.cuda(device_id, async)
-    return t
-
-
 def dense(var):
     return [torch.mean(torch.abs(x)) if torch.is_tensor(x) else np.abs(x) for x in var]