-
Notifications
You must be signed in to change notification settings - Fork 76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
In train.py file #35
Comments
---ModuleNotFoundError: No module named 'model' |
i don't change any code. |
i don't change any code.except line 30
parser.add_argument('--data_dir',default='./dataset/DukeMTMC_prepare/',type=str,
help='training dir path')
does module 'model' have to be download? and what is path dir for model?
model folder is empty , if have downloaded the model folder with files from
https://github.com/layumi/Person_reID_baseline_pytorch but the error is
still same
i have executed prepare.py successfully that created dataset now i am tring
to run train file
i have installed pytorch using " conda install pytorch torchvision
torchaudio cudatoolkit=10.2 -c pytorch "
what else are required to download?
i have just started learning deep learning appreciated for your support
these are screenshot of files and path of project
On Thu, Nov 5, 2020 at 7:26 PM Wanggcong ***@***.***> wrote:
---ModuleNotFoundError: No module named 'model'
-- Check if you import the model.
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<#35 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ANK34ZA7X4E5LI4LZAPKRXLSOKVJFANCNFSM4TKNQPKA>
.
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from PIL import Image
import time
import os
from model import ft_net, ft_net_dense, PCB
from random_erasing import RandomErasing
import json
######################################################################
# Options
# --------
parser = argparse.ArgumentParser(description='Training')
parser.add_argument('--gpu_ids',default='0', type=str,help='gpu_ids: e.g. 0 0,1,2 0,2')
parser.add_argument('--name',default='ft_ResNet50', type=str, help='output model name')
parser.add_argument('--data_dir',default='./dataset/DukeMTMC_prepare/',type=str, help='training dir path')
parser.add_argument('--train_all', action='store_true', help='use all training data' )
parser.add_argument('--color_jitter', action='store_true', help='use color jitter in training' )
parser.add_argument('--batchsize', default=32, type=int, help='batchsize')
parser.add_argument('--erasing_p', default=0, type=float, help='Random Erasing probability, in [0,1]')
parser.add_argument('--use_dense', action='store_true', help='use densenet121' )
parser.add_argument('--PCB', action='store_true', help='use PCB+ResNet50' )
opt = parser.parse_args()
data_dir = opt.data_dir
name = opt.name
str_ids = opt.gpu_ids.split(',')
gpu_ids = []
for str_id in str_ids:
gid = int(str_id)
if gid >=0:
gpu_ids.append(gid)
# set gpu ids
if len(gpu_ids)>0:
torch.cuda.set_device(gpu_ids[0])
#print(gpu_ids[0])
if not os.path.exists("./model/"):
os.makedirs("./model/")
######################################################################
# Load Data
# ---------
#
transform_train_list = [
#transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC)
transforms.Resize((288,144), interpolation=3),
transforms.RandomCrop((256,128)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [
transforms.Resize(size=(256,128),interpolation=3), #Image.BICUBIC
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
if opt.PCB:
transform_train_list = [
transforms.Resize((384,192), interpolation=3),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [
transforms.Resize(size=(384,192),interpolation=3), #Image.BICUBIC
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
if opt.erasing_p>0:
transform_train_list = transform_train_list + [RandomErasing(probability = opt.erasing_p, mean=[0.0, 0.0, 0.0])]
if opt.color_jitter:
transform_train_list = [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0)] + transform_train_list
print(transform_train_list)
data_transforms = {
'train': transforms.Compose( transform_train_list ),
'val': transforms.Compose(transform_val_list),
}
train_all = ''
if opt.train_all:
train_all = '_all'
image_datasets = {}
image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all),
data_transforms['train'])
image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'),
data_transforms['val'])
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
shuffle=True, num_workers=16)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
inputs, classes = next(iter(dataloaders['train']))
######################################################################
# Training the model
# ------------------
#
# Now, let's write a general function to train a model. Here, we will
# illustrate:
#
# - Scheduling the learning rate
# - Saving the best model
#
# In the following, parameter ``scheduler`` is an LR scheduler object from
# ``torch.optim.lr_scheduler``.
y_loss = {} # loss history
y_loss['train'] = []
y_loss['val'] = []
y_err = {}
y_err['train'] = []
y_err['val'] = []
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
#print(inputs.shape)
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
if not opt.PCB:
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
else:
part = {}
sm = nn.Softmax(dim=1)
num_part = 6
for i in range(num_part):
part[i] = outputs[i]
score = sm(part[0]) + sm(part[1]) +sm(part[2]) + sm(part[3]) +sm(part[4]) +sm(part[5])
_, preds = torch.max(score.data, 1)
loss = criterion(part[0], labels)
for i in range(num_part-1):
loss += criterion(part[i+1], labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels.data)
# print(running_corrects)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = (running_corrects.item()) / dataset_sizes[phase]
# print(running_corrects.item())
# print(dataset_sizes[phase])
# print(epoch_acc)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
y_loss[phase].append(epoch_loss)
y_err[phase].append(1.0-epoch_acc)
# deep copy the model
if phase == 'val':
last_model_wts = model.state_dict()
if epoch%10 == 9:
save_network(model, epoch)
draw_curve(epoch)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
#print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(last_model_wts)
save_network(model, 'last')
return model
######################################################################
# Draw Curve
#---------------------------
x_epoch = []
fig = plt.figure()
ax0 = fig.add_subplot(121, title="loss")
ax1 = fig.add_subplot(122, title="top1err")
def draw_curve(current_epoch):
x_epoch.append(current_epoch)
ax0.plot(x_epoch, y_loss['train'], 'bo-', label='train')
ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val')
ax1.plot(x_epoch, y_err['train'], 'bo-', label='train')
ax1.plot(x_epoch, y_err['val'], 'ro-', label='val')
if current_epoch == 0:
ax0.legend()
ax1.legend()
fig.savefig( os.path.join('./model',name,'train.jpg'))
######################################################################
# Save model
#---------------------------
def save_network(network, epoch_label):
save_filename = 'net_%s.pth'% epoch_label
save_path = os.path.join('./model',name,save_filename)
torch.save(network.cpu().state_dict(), save_path)
if torch.cuda.is_available:
network.cuda(gpu_ids[0])
######################################################################
# Finetuning the convnet
# ----------------------
#
# Load a pretrainied model and reset final fully connected layer.
#
if opt.use_dense:
model = ft_net_dense(len(class_names))
else:
model = ft_net(len(class_names))
if opt.PCB:
model = PCB(len(class_names))
print(model)
if use_gpu:
model = model.cuda()
criterion = nn.CrossEntropyLoss()
if not opt.PCB:
ignored_params = list(map(id, model.model.fc.parameters() )) + list(map(id, model.classifier.parameters() ))
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.01},
{'params': model.model.fc.parameters(), 'lr': 0.1},
{'params': model.classifier.parameters(), 'lr': 0.1}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
else:
ignored_params = list(map(id, model.model.fc.parameters() ))
ignored_params += (list(map(id, model.classifier0.parameters() ))
+list(map(id, model.classifier1.parameters() ))
+list(map(id, model.classifier2.parameters() ))
+list(map(id, model.classifier3.parameters() ))
+list(map(id, model.classifier4.parameters() ))
+list(map(id, model.classifier5.parameters() ))
#+list(map(id, model.classifier6.parameters() ))
#+list(map(id, model.classifier7.parameters() ))
)
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.01},
{'params': model.model.fc.parameters(), 'lr': 0.1},
{'params': model.classifier0.parameters(), 'lr': 0.1},
{'params': model.classifier1.parameters(), 'lr': 0.1},
{'params': model.classifier2.parameters(), 'lr': 0.1},
{'params': model.classifier3.parameters(), 'lr': 0.1},
{'params': model.classifier4.parameters(), 'lr': 0.1},
{'params': model.classifier5.parameters(), 'lr': 0.1},
#{'params': model.classifier6.parameters(), 'lr': 0.01},
#{'params': model.classifier7.parameters(), 'lr': 0.01}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
# Decay LR by a factor of 0.1 every 40 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# It should take around 1-2 hours on GPU.
#
dir_name = os.path.join('./model',name)
if not os.path.isdir(dir_name):
os.mkdir(dir_name)
# save opts
with open('%s/opts.json'%dir_name,'w') as fp:
json.dump(vars(opt), fp, indent=1)
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=60)
|
This error is caused by python, not deep learning. First, you have to check if there exists a "model.py" file in your folder. If yes, you may try both python2 and python 3. |
Thank you so much sir for your help,
i changed line (112) this code to num_workers=0 and it executed but then got this error
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0;
4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free;
2.74 GiB reserved in total by PyTorch)
Training code almost similar to yours code worked with one warning
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],batch_size=opt.batchsize,shuffle=True,num_workers=0, pin_memory=True) # 8 workers may work faster
previously
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=opt.batchsize,
shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
OUTPUT of train.py(Person_reID_baseline_pytorch
<https://github.com/layumi/Person_reID_baseline_pytorch>)
Epoch 0/59
----------
C:\Users\ATUL\anaconda3\lib\site-packages\torch\optim\lr_scheduler.py:131:
UserWarning: Detected call of `lr_scheduler.step()` before
`optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the
opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure
to do this will result in PyTorch skipping the first value of the learning
rate schedule. See more details at https://pytorch.org/docs/stable/optim
.html#how-to-adjust-learning-rate
warnings.warn("Detected call of `lr_scheduler.step()` before
`optimizer.step()`. "
train Loss: 3.8366 Acc: 0.2820
val Loss: 1.8972 Acc: 0.5113
Training complete in 5m 2s
...
...
....
Epoch 59/59
----------
train Loss: 0.0198 Acc: 0.9994
val Loss: 0.0063 Acc: 0.9800
Training complete in 284m 15s
Training complete in 284m 15s
…On Fri, Nov 6, 2020 at 5:37 AM Wanggcong ***@***.***> wrote:
---ModuleNotFoundError: No module named 'model'
-- Check if you import the model.
i don't change any code.
does module 'model' have to be download? and what is path dir for model?
model folder is empty
i have executed prepare.py successfully that created dataset now i am
tring to run train file
i have installed pytorch using " conda install pytorch torchvision
torchaudio cudatoolkit=10.2 -c pytorch "
what else are required to download?
i have just started learning deep learning appreciated for your support
This error is caused by python, not deep learning. First, you have to
check if there exists a "model.py" file in your folder. If yes, you may try
both python2 and python 3.
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<#35 (comment)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA>
.
|
train.py file(Person_reID_baseline_pytorch)
UserWarning: Detected call of `lr_scheduler.step()` before
`optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the
opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure
to do this will result in PyTorch skipping the first value of the learning
rate schedule. See more details at https://pytorch.org/docs/stable/optim
.html#how-to-adjust-learning-rate
On Fri, Nov 6, 2020 at 9:35 PM ATUL YADAV ***@***.***> wrote:
Thank you so much sir for your help,
i changed line (112) this code to num_workers=0 and it executed with one warning.
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free; 2.74 GiB reserved in total by PyTorch)
training code almost similar to yours code worked with one warning
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster
previously
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=opt.batchsize,
shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
OUTPUT of train.py(Person_reID_baseline_pytorch
<https://github.com/layumi/Person_reID_baseline_pytorch>)
Epoch 0/59
----------
C:\Users\ATUL\anaconda3\lib\site-packages\torch\optim\lr_scheduler.py:131:
UserWarning: Detected call of `lr_scheduler.step()` before
`optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in
the opposite order: `optimizer.step()` before `lr_scheduler.step()`.
Failure to do this will result in PyTorch skipping the first value of the
learning rate schedule. See more details at https://pytorch
.org/docs/stable/optim.html#how-to-adjust-learning-rate
warnings.warn("Detected call of `lr_scheduler.step()` before
`optimizer.step()`. "
train Loss: 3.8366 Acc: 0.2820
val Loss: 1.8972 Acc: 0.5113
Training complete in 5m 2s
...
...
....
Epoch 59/59
----------
train Loss: 0.0198 Acc: 0.9994
val Loss: 0.0063 Acc: 0.9800
Training complete in 284m 15s
Training complete in 284m 15s
On Fri, Nov 6, 2020 at 5:37 AM Wanggcong ***@***.***> wrote:
> ---ModuleNotFoundError: No module named 'model'
> -- Check if you import the model.
>
> i don't change any code.
> does module 'model' have to be download? and what is path dir for model?
> model folder is empty
> i have executed prepare.py successfully that created dataset now i am
> tring to run train file
> i have installed pytorch using " conda install pytorch torchvision
> torchaudio cudatoolkit=10.2 -c pytorch "
> what else are required to download?
> i have just started learning deep learning appreciated for your support
>
> This error is caused by python, not deep learning. First, you have to
> check if there exists a "model.py" file in your folder. If yes, you may try
> both python2 and python 3.
>
> —
> You are receiving this because you authored the thread.
> Reply to this email directly, view it on GitHub
> <#35 (comment)>,
> or unsubscribe
> <https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA>
> .
>
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, transforms
if __name__ == '__main__': torch.multiprocessing.freeze_support()
import torch.backends.cudnn as cudnn
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
#from PIL import Image
import time
import os
from model import ft_net, ft_net_dense, ft_net_NAS, PCB
from random_erasing import RandomErasing
import yaml
import math
from shutil import copyfile
version = torch.__version__
#fp16
try:
from apex.fp16_utils import *
from apex import amp, optimizers
except ImportError: # will be 3.x series
print('This is not an error. If you want to use low precision, i.e., fp16, please install the apex with cuda support (https://github.com/NVIDIA/apex) and update pytorch to 1.0')
######################################################################
# Options
# --------
parser = argparse.ArgumentParser(description='Training')
parser.add_argument('--gpu_ids',default='0', type=str,help='gpu_ids: e.g. 0 0,1,2 0,2')
parser.add_argument('--name',default='ft_ResNet50', type=str, help='output model name')
parser.add_argument('--data_dir',default='./Market/pytorch',type=str, help='training dir path')
parser.add_argument('--train_all', action='store_true', help='use all training data' )
parser.add_argument('--color_jitter', action='store_true', help='use color jitter in training' )
parser.add_argument('--batchsize', default=32, type=int, help='batchsize')
parser.add_argument('--stride', default=2, type=int, help='stride')
parser.add_argument('--erasing_p', default=0, type=float, help='Random Erasing probability, in [0,1]')
parser.add_argument('--use_dense', action='store_true', help='use densenet121' )
parser.add_argument('--use_NAS', action='store_true', help='use NAS' )
parser.add_argument('--warm_epoch', default=0, type=int, help='the first K epoch that needs warm up')
parser.add_argument('--lr', default=0.05, type=float, help='learning rate')
parser.add_argument('--droprate', default=0.5, type=float, help='drop rate')
parser.add_argument('--PCB', action='store_true', help='use PCB+ResNet50' )
parser.add_argument('--fp16', action='store_true', help='use float16 instead of float32, which will save about 50% memory' )
opt = parser.parse_args()
fp16 = opt.fp16
data_dir = opt.data_dir
name = opt.name
str_ids = opt.gpu_ids.split(',')
gpu_ids = []
for str_id in str_ids:
gid = int(str_id)
if gid >=0:
gpu_ids.append(gid)
# set gpu ids
if len(gpu_ids)>0:
torch.cuda.set_device(gpu_ids[0])
cudnn.benchmark = True
######################################################################
# Load Data
# ---------
#
transform_train_list = [
#transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC)
transforms.Resize((256,128), interpolation=3),
transforms.Pad(10),
transforms.RandomCrop((256,128)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [
transforms.Resize(size=(256,128),interpolation=3), #Image.BICUBIC
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
if opt.PCB:
transform_train_list = [
transforms.Resize((384,192), interpolation=3),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
transform_val_list = [
transforms.Resize(size=(384,192),interpolation=3), #Image.BICUBIC
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]
if opt.erasing_p>0:
transform_train_list = transform_train_list + [RandomErasing(probability = opt.erasing_p, mean=[0.0, 0.0, 0.0])]
if opt.color_jitter:
transform_train_list = [transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0)] + transform_train_list
print(transform_train_list)
data_transforms = {
'train': transforms.Compose( transform_train_list ),
'val': transforms.Compose(transform_val_list),
}
train_all = ''
if opt.train_all:
train_all = '_all'
image_datasets = {}
image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train' + train_all),
data_transforms['train'])
image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'),
data_transforms['val'])
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
since = time.time()
inputs, classes = next(iter(dataloaders['train']))
print(time.time()-since)
######################################################################
# Training the model
# ------------------
#
# Now, let's write a general function to train a model. Here, we will
# illustrate:
#
# - Scheduling the learning rate
# - Saving the best model
#
# In the following, parameter ``scheduler`` is an LR scheduler object from
# ``torch.optim.lr_scheduler``.
y_loss = {} # loss history
y_loss['train'] = []
y_loss['val'] = []
y_err = {}
y_err['train'] = []
y_err['val'] = []
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
#best_model_wts = model.state_dict()
#best_acc = 0.0
warm_up = 0.1 # We start from the 0.1*lrRate
warm_iteration = round(dataset_sizes['train']/opt.batchsize)*opt.warm_epoch # first 5 epoch
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
now_batch_size,c,h,w = inputs.shape
if now_batch_size<opt.batchsize: # skip the last batch
continue
#print(inputs.shape)
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda().detach())
labels = Variable(labels.cuda().detach())
else:
inputs, labels = Variable(inputs), Variable(labels)
# if we use low precision, input also need to be fp16
#if fp16:
# inputs = inputs.half()
# zero the parameter gradients
optimizer.zero_grad()
# forward
if phase == 'val':
with torch.no_grad():
outputs = model(inputs)
else:
outputs = model(inputs)
if not opt.PCB:
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
else:
part = {}
sm = nn.Softmax(dim=1)
num_part = 6
for i in range(num_part):
part[i] = outputs[i]
score = sm(part[0]) + sm(part[1]) +sm(part[2]) + sm(part[3]) +sm(part[4]) +sm(part[5])
_, preds = torch.max(score.data, 1)
loss = criterion(part[0], labels)
for i in range(num_part-1):
loss += criterion(part[i+1], labels)
# backward + optimize only if in training phase
if epoch<opt.warm_epoch and phase == 'train':
warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
loss *= warm_up
if phase == 'train':
if fp16: # we use optimier to backward loss
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
optimizer.step()
# statistics
if int(version[0])>0 or int(version[2]) > 3: # for the new version like 0.4.0, 0.5.0 and 1.0.0
running_loss += loss.item() * now_batch_size
else : # for the old version like 0.3.0 and 0.3.1
running_loss += loss.data[0] * now_batch_size
running_corrects += float(torch.sum(preds == labels.data))
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
y_loss[phase].append(epoch_loss)
y_err[phase].append(1.0-epoch_acc)
# deep copy the model
if phase == 'val':
last_model_wts = model.state_dict()
if epoch%10 == 9:
save_network(model, epoch)
draw_curve(epoch)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
#print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(last_model_wts)
save_network(model, 'last')
return model
######################################################################
# Draw Curve
#---------------------------
x_epoch = []
fig = plt.figure()
ax0 = fig.add_subplot(121, title="loss")
ax1 = fig.add_subplot(122, title="top1err")
def draw_curve(current_epoch):
x_epoch.append(current_epoch)
ax0.plot(x_epoch, y_loss['train'], 'bo-', label='train')
ax0.plot(x_epoch, y_loss['val'], 'ro-', label='val')
ax1.plot(x_epoch, y_err['train'], 'bo-', label='train')
ax1.plot(x_epoch, y_err['val'], 'ro-', label='val')
if current_epoch == 0:
ax0.legend()
ax1.legend()
fig.savefig( os.path.join('./model',name,'train.jpg'))
######################################################################
# Save model
#---------------------------
def save_network(network, epoch_label):
save_filename = 'net_%s.pth'% epoch_label
save_path = os.path.join('./model',name,save_filename)
torch.save(network.cpu().state_dict(), save_path)
if torch.cuda.is_available():
network.cuda(gpu_ids[0])
######################################################################
# Finetuning the convnet
# ----------------------
#
# Load a pretrainied model and reset final fully connected layer.
#
if opt.use_dense:
model = ft_net_dense(len(class_names), opt.droprate)
elif opt.use_NAS:
model = ft_net_NAS(len(class_names), opt.droprate)
else:
model = ft_net(len(class_names), opt.droprate, opt.stride)
if opt.PCB:
model = PCB(len(class_names))
opt.nclasses = len(class_names)
print(model)
if not opt.PCB:
ignored_params = list(map(id, model.classifier.parameters() ))
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.1*opt.lr},
{'params': model.classifier.parameters(), 'lr': opt.lr}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
else:
ignored_params = list(map(id, model.model.fc.parameters() ))
ignored_params += (list(map(id, model.classifier0.parameters() ))
+list(map(id, model.classifier1.parameters() ))
+list(map(id, model.classifier2.parameters() ))
+list(map(id, model.classifier3.parameters() ))
+list(map(id, model.classifier4.parameters() ))
+list(map(id, model.classifier5.parameters() ))
#+list(map(id, model.classifier6.parameters() ))
#+list(map(id, model.classifier7.parameters() ))
)
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.1*opt.lr},
{'params': model.model.fc.parameters(), 'lr': opt.lr},
{'params': model.classifier0.parameters(), 'lr': opt.lr},
{'params': model.classifier1.parameters(), 'lr': opt.lr},
{'params': model.classifier2.parameters(), 'lr': opt.lr},
{'params': model.classifier3.parameters(), 'lr': opt.lr},
{'params': model.classifier4.parameters(), 'lr': opt.lr},
{'params': model.classifier5.parameters(), 'lr': opt.lr},
#{'params': model.classifier6.parameters(), 'lr': 0.01},
#{'params': model.classifier7.parameters(), 'lr': 0.01}
], weight_decay=5e-4, momentum=0.9, nesterov=True)
# Decay LR by a factor of 0.1 every 40 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
######################################################################
# Train and evaluate
# ^^^^^^^^^^^^^^^^^^
#
# It should take around 1-2 hours on GPU.
#
dir_name = os.path.join('./model',name)
if not os.path.isdir(dir_name):
os.mkdir(dir_name)
#record every run
copyfile('./train.py', dir_name+'/train.py')
copyfile('./model.py', dir_name+'/model.py')
# save opts
with open('%s/opts.yaml'%dir_name,'w') as fp:
yaml.dump(vars(opt), fp, default_flow_style=False)
# model to gpu
model = model.cuda()
if fp16:
#model = network_to_half(model)
#optimizer_ft = FP16_Optimizer(optimizer_ft, static_loss_scale = 128.0)
model, optimizer_ft = amp.initialize(model, optimizer_ft, opt_level = "O1")
criterion = nn.CrossEntropyLoss()
model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=60)
|
i reduce the batch_size from 32 to 16 it worked can you please tell me what
i put values to batch_size and num_workers or it varies from system to
system
And please tell me what is prerequisite knowledge is required to properly
understand this code , Because i have small understanding of ML and python
what else knowledge is required.
…On Fri, Nov 6, 2020 at 9:37 PM ATUL YADAV ***@***.***> wrote:
train.py file(Person_reID_baseline_pytorch)
UserWarning: Detected call of `lr_scheduler.step()` before
`optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in
the opposite order: `optimizer.step()` before `lr_scheduler.step()`.
Failure to do this will result in PyTorch skipping the first value of the
learning rate schedule. See more details at https://pytorch
.org/docs/stable/optim.html#how-to-adjust-learning-rate
On Fri, Nov 6, 2020 at 9:35 PM ATUL YADAV ***@***.***> wrote:
> Thank you so much sir for your help,
>
> i changed line (112) this code to num_workers=0 and it executed with one warning.
>
> RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 4.00 GiB total capacity; 2.71 GiB already allocated; 60.61 MiB free; 2.74 GiB reserved in total by PyTorch)
>
> training code almost similar to yours code worked with one warning
>
> dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
> shuffle=True, num_workers=0, pin_memory=True) # 8 workers may work faster
>
> previously
>
> dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
> batch_size=opt.batchsize,
> shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
>
>
> OUTPUT of train.py(Person_reID_baseline_pytorch
> <https://github.com/layumi/Person_reID_baseline_pytorch>)
>
> Epoch 0/59
> ----------
> C:\Users\ATUL\anaconda3\lib\site-packages\torch\optim\lr_scheduler.py:131:
> UserWarning: Detected call of `lr_scheduler.step()` before
> `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in
> the opposite order: `optimizer.step()` before `lr_scheduler.step()`.
> Failure to do this will result in PyTorch skipping the first value of
> the learning rate schedule. See more details at https://pytorch
> .org/docs/stable/optim.html#how-to-adjust-learning-rate
> warnings.warn("Detected call of `lr_scheduler.step()` before
> `optimizer.step()`. "
> train Loss: 3.8366 Acc: 0.2820
> val Loss: 1.8972 Acc: 0.5113
> Training complete in 5m 2s
> ...
> ...
> ....
> Epoch 59/59
> ----------
> train Loss: 0.0198 Acc: 0.9994
> val Loss: 0.0063 Acc: 0.9800
> Training complete in 284m 15s
>
> Training complete in 284m 15s
>
> On Fri, Nov 6, 2020 at 5:37 AM Wanggcong ***@***.***>
> wrote:
>
>> ---ModuleNotFoundError: No module named 'model'
>> -- Check if you import the model.
>>
>> i don't change any code.
>> does module 'model' have to be download? and what is path dir for model?
>> model folder is empty
>> i have executed prepare.py successfully that created dataset now i am
>> tring to run train file
>> i have installed pytorch using " conda install pytorch torchvision
>> torchaudio cudatoolkit=10.2 -c pytorch "
>> what else are required to download?
>> i have just started learning deep learning appreciated for your support
>>
>> This error is caused by python, not deep learning. First, you have to
>> check if there exists a "model.py" file in your folder. If yes, you may try
>> both python2 and python 3.
>>
>> —
>> You are receiving this because you authored the thread.
>> Reply to this email directly, view it on GitHub
>> <#35 (comment)>,
>> or unsubscribe
>> <https://github.com/notifications/unsubscribe-auth/ANK34ZGZ72CHTZ5XPFPIAUTSOM443ANCNFSM4TKNQPKA>
>> .
>>
>
|
I am sorry that I really do not know your questions and the corresponding solutions. |
Modulation rrp code ^8. |
Hello
You have done excellent and impressive work. Actually, I am new in machine learning and I was trying to run the code but I was facing problems. It would be grateful if you help me
my specs- i5 8300h , gtx 1050m ti 4 gb, ram 8 gb currently i am using this code on windows 10
if want to re-train a model using market 1501 dataset
error-
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:14: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaiming_normal_.
init.kaiming_normal(m.weight.data, a=0, mode='fan_out')
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:15: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant_.
init.constant(m.bias.data, 0.0)
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:17: UserWarning: nn.init.normal is now deprecated in favor of nn.init.normal_.
init.normal(m.weight.data, 1.0, 0.02)
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:18: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant_.
init.constant(m.bias.data, 0.0)
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:23: UserWarning: nn.init.normal is now deprecated in favor of nn.init.normal_.
init.normal(m.weight.data, std=0.001)
C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\model.py:24: UserWarning: nn.init.constant is now deprecated in favor of nn.init.constant_.
init.constant(m.bias.data, 0.0)
net output size:
torch.Size([8, 751])
0
[Resize(size=(288, 144), interpolation=PIL.Image.BICUBIC), RandomCrop(size=(256, 128), padding=None), RandomHorizontalFlip(p=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), <random_erasing.RandomErasing object at 0x000001CD467C78B0>]
Traceback (most recent call last):
File "", line 1, in
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "C:\Users\ATUL\Anaconda3\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "C:\Users\ATUL\Anaconda3\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\ATUL\Desktop\Python\Spatial-Temporal-Re-identification-master\train_market.py", line 20, in
from model import ft_net, ft_net_dense, PCB
ModuleNotFoundError: No module named 'model'
The text was updated successfully, but these errors were encountered: