-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
172 lines (138 loc) · 5.52 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import torch as ch
import numpy as np
from tqdm.auto import tqdm
from pathlib import Path
import sys
assert sys.version_info.major == 3 and sys.version_info.minor >= 9, 'need python 3.9+'
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
PLACES_DATASET = None
if not PLACES_DATASET:
raise NotImplementedError('Need to set PLACES_DATASET to the path of the Places365 dataset')
LR = 0.1
EPOCHS = 2
WD = 0
BS = 256
NUM_CLASSES = 365
# you probably should set this to 1 if you are debugging the script
DEBUG_MODE = 0
if DEBUG_MODE:
EPOCHS = 1
def make_model():
model = ch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
# set the last layer to be the right size
in_features = model.fc.in_features
setattr(model, 'fc', ch.nn.Linear(in_features, NUM_CLASSES))
# format model properly
model = model.cuda().to(memory_format=ch.channels_last)
return model
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406])
IMAGENET_STD = np.array([0.229, 0.224, 0.225])
def data_postprocess(x, y):
x = x.to(device='cuda', non_blocking=True)
x = x.to(memory_format=ch.channels_last, non_blocking=True)
y = y.to(device='cuda', non_blocking=True)
return x, y
def data_checks(x, y):
assert x.shape == (BS, 3, 224, 224), x.shape
assert y.shape == (BS,) and y.dtype == ch.int64, (y.shape, y.dtype)
assert x.device != ch.device('cpu'), x.device
assert y.device != ch.device('cpu'), y.device
def get_target_transforms():
train_ds = ImageFolder(root=PLACES_DATASET / 'train')
val_ds = ImageFolder(root=PLACES_DATASET / 'val')
def train_target_transform(x):
return x
def val_target_transform(x):
curr_label = val_ds.classes[x]
idx = train_ds.class_to_idx[curr_label]
return idx
return train_target_transform, val_target_transform
def make_fast_loaders():
# train_loader = ...
# val_loader = ...
# return train_loader, val_loader
raise NotImplementedError()
def make_loaders():
normalize = transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
tr_target_transform, val_target_transform = get_target_transforms()
train_ds = ImageFolder(
root=PLACES_DATASET / 'train',
transform=transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize
]),
target_transform=tr_target_transform)
val_ds = ImageFolder(
root=PLACES_DATASET / 'val',
transform=transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
]), target_transform=val_target_transform)
train_loader = DataLoader(train_ds, batch_size=BS, shuffle=True,
drop_last=True, num_workers=5)
val_loader = DataLoader(val_ds, batch_size=BS, shuffle=False,
drop_last=False, num_workers=5)
return train_loader, val_loader
def main():
'''
Instructions:
- Modify the following code to finetune places365 on a GPU
using the data loading framework assigned to you. While you can modify all
the code as you see fit, you should only have to modify (a) the `make_loaders`
function and (b) the training loops.
'''
model = make_model()
model_params = model.fc.parameters()
optimizer = ch.optim.SGD(model_params, lr=LR, weight_decay=WD)
losser = ch.nn.CrossEntropyLoss()
# this will train with standard pytorch dataloader
# train_loader, val_loader = make_loaders()
# TODO: fix this so that this script uses the proper dataloade
train_loader, val_loader = make_fast_loaders()
# half prec training
scaler = ch.cuda.amp.GradScaler()
# make scheduler
num_iterations = len(train_loader) * EPOCHS
scheduler = ch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1,
end_factor=0, last_epoch=-1,
total_iters=num_iterations)
# repeat 3x
for epoch in range(EPOCHS):
# first train one epoch
model.train()
assert train_loader is not None, 'need to define train loader!'
for iteration, (x, y) in enumerate(tqdm(train_loader)):
x, y = data_postprocess(x, y)
# you should not have to touch anything after this
with ch.cuda.amp.autocast():
data_checks(x, y) # you should not get an error here!
out = model(x)
loss = losser(out, y)
if iteration % 10 == 0:
print('Loss, ', loss.item())
optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(optimizer)
scheduler.step()
scaler.update()
# then test
model.eval()
all_corrects = []
assert val_loader is not None, 'need to define val loader!'
for iteration, (x, y) in enumerate(tqdm(val_loader)):
x, y = data_postprocess(x, y)
# you should not have to touch anything after this
out = model(x)
corrects = ch.argmax(out, dim=1) == y
all_corrects.append(corrects)
# all_corrects is bool; cast to 0/1 floats and take mean for accuracy
acc1 = ch.cat(all_corrects).float().mean()
print(f'Accuracy @ epoch {epoch}: {acc1}')
if __name__ == '__main__':
main()