-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathstyle_transfer.py
144 lines (110 loc) · 3.7 KB
/
style_transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import cv2
import sys
import shutil
import os
import numpy as np
import scipy.ndimage.filters
from functools import partial
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from helpers import *
from constants import *
# class which stores forward pass outputs of vgg using a forward hook
class SaveFeatures(nn.Module):
features = None;
def __init__(self, m):
self.hook = m.register_forward_hook(self.hook_fn);
def hook_fn(self, module, input, output):
self.features = output;
def close(self):
self.hook.remove();
# method to read content/style images
def get_images(idx):
cnt = read_img(os.path.join(IMG_DIR, 'content_{}.png'.format(idx)));
style = read_img(os.path.join(IMG_DIR, 'style_{}.png'.format(idx)));
cnt = np.transpose(cnt, (2,0,1));
style = np.transpose(style, (2,0,1));
# show_img(cnt);
# show_img(style);
return cnt, style;
# MSE loss between content-input features
def content_loss(yhat):
_loss = 0;
for i in range(len(yhat)):
_loss += F.mse_loss(cnt_features[i], yhat[i]);
return _loss/len(yhat);
# compute gram matrix
def gram(x):
b,c,h,w = x.size();
x = x.view(b*c, -1);
return torch.mm(x, x.t())
# MSE loss between gram matrices of content and style images
def style_loss(yhat):
_loss = 0;
for i in range(len(yhat)):
style_gram = gram(sty_features[i]);
ip_gram = gram(yhat[i]);
_loss += F.mse_loss(style_gram, ip_gram);
return _loss/len(yhat);
def step():
c_w = 0.001;
s_w = 0.001;
global i;
vgg(ip);
cnt_ip_features = [sf.features.clone() for sf in cnt_sfs];
sty_ip_features = [sf.features.clone() for sf in sty_sfs];
cnt_loss = c_w * content_loss(cnt_ip_features);
sty_loss = s_w * style_loss(sty_ip_features);
loss = cnt_loss + sty_loss;
optimizer.zero_grad();
loss.backward();
if i % 100 == 0:
print('Step - {}, Content loss - {}, Style loss - {}, Total Loss - {}'\
.format(i, cnt_loss.data[0], sty_loss.data[0], loss.data[0]));
out_img = ip.data.cpu().squeeze().permute(1,2,0).numpy();
cv2.imwrite(os.path.join('debug', str(i) + '.png'), out_img*255);
i += 1;
return loss;
if __name__ == '__main__':
gpu0 = torch.device("cuda:0")
if os.path.exists('debug'):
shutil.rmtree('debug')
os.makedirs('debug')
MAX = len(os.listdir(IMG_DIR))/2;
print('Reading content, style images...');
np_cnt, np_style = get_images(IDX);
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]);
print('Loading {} model...'.format(MODEL));
model = getattr(models, MODEL);
vgg = model(pretrained=True);
cnt, style = torch.Tensor(np_cnt), torch.Tensor(np_style);
# picking model params upto 38 layers leaving out classifier and freezing weights
vgg = nn.Sequential(*list(vgg.features.children())[:43]).cuda();
for p in vgg.parameters():
p.requires_grad = False;
# register forward hook for content image
layers = [5, 12, 22];
cnt_sfs = [SaveFeatures(vgg[i]) for i in layers];
# pass content/style image and save features
vgg(Variable(cnt[None].to(gpu0)));
cnt_features = [sf.features.clone() for sf in cnt_sfs];
layers = [5, 12, 22, 32, 42];
sty_sfs = [SaveFeatures(vgg[i]) for i in layers];
vgg(Variable(style[None].to(gpu0)));
sty_features = [sf.features.clone() for sf in sty_sfs];
# input noise image and set it trainable
np_ip = np.random.uniform(0.0, 1.0, size=np_cnt.shape);
np_ip = scipy.ndimage.filters.median_filter(np_ip, [8,8,1]);
ip = torch.Tensor(np_ip)[None].to(gpu0);
ip = Variable(ip, requires_grad=True);
optimizer = optim.LBFGS([ip], lr=0.005);
print('\nTraining...');
i = 0;
while i < STEPS:
optimizer.step(step);