[update] add mobilenetv3

myatmyintzuthin · Nov 3, 2022 · beda720 · beda720
1 parent ed55226
commit beda720
Show file tree

Hide file tree

Showing 8 changed files with 174 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ Ramen :ramen: , Sashimi :fish: , Sushi :sushi: , Takoyaki :dango:
 | ----------- | ----------- |
 | VGG         | 11, 13, 16, 19 |
 | Resnet      | 18, 34, 50, 101, 152 |
-| Mobilenet   | v2 |
+| Mobilenet   | v2, v3-small, v3-large |
 
 ## 1. Dependencies
 
@@ -64,6 +64,7 @@ options:
     --eval : To evaluate the model without training again
     --eval_model : model path for evaluation
 ```
+The result for each model training is saved in `experiments/{model_name}{datetime}/` folder.
 ## 6. Inference
 For inference, run:
 ```bash

diff --git a/config.yaml b/config.yaml
@@ -4,18 +4,18 @@ dataset:
   class_name: ['ramen', 'sashimi', 'sushi','takoyaki']
 
 model: 
-  name: 'resnet'
-  variant : '50' 
+  name: 'mobilenet'
+  variant : 'v3-large' 
   width_multi: 1.0  
 
 train:
   epochs: 10
   batch_size: 8
   num_worker: 4
   lr: 0.001
-  model_path: 'resnet50_food.pt'
-  pretrained_path: 'pretrained_weights/resnet50.pt'
+  model_path: 'mobilenetv3L_food.pt'
+  pretrained_path: 'pretrained_weights/mobilenetv3-large.pt'
   save_dir: 'experiments'
 
 test:
-  model_path: 'experiments/resnet50_202210311348/resnet50_food.pt'
+  model_path: 'experiments/mobilenetv3-large_202211030833/mobilenetv3L_food.pt'
diff --git a/convert.py b/convert.py
@@ -6,6 +6,7 @@
 from models.resnet import ResNet, ResidualBlock, ResBottleneckBlock
 from models.vgg import VGG
 from models.mobilenetv2 import MobileNetV2
+from models.mobilenetv3 import MobileNetV3
 
 class ConvertModel:
     def __init__(self, model: str, variant: str, width_multi: float, num_class: str) -> None:
@@ -27,7 +28,10 @@ def load_model(self):
                 model = ResNet(ResBottleneckBlock, resnet_config[str(self.variant)]['repeat'], useBottleneck=True, num_class=self.num_class)
         if self.model == 'mobilenet':
             if self.variant == 'v2':
-                model = MobileNetV2(self.num_class, self.width_mutli)
+                model = MobileNetV2(mobilenet_config[str(self.variant)]['cfg'], self.num_class, self.width_mutli)
+            else:
+                model = MobileNetV3(mobilenet_config[str(self.variant)]['cfg'], self.num_class)
+
         return model
 
     def initialize_weights(self):
@@ -46,7 +50,7 @@ def initialize_weights(self):
             pretrained_model = mobilenet_config[str(self.variant)]['torch_model'](pretrained = True)
 
         pretrained_state_dict = pretrained_model.state_dict()
-
+        
         for my, pre in zip(my_state_dict.keys(), pretrained_state_dict.keys()):
             my_state_dict[my] = pretrained_state_dict[pre]
 

diff --git a/models/blocks.py b/models/blocks.py
@@ -1,10 +1,14 @@
 import numpy as np
 import torch.nn as nn
+import torch
 
 def activation(act: str):
     act_list = {
         'relu': nn.ReLU,
-        'relu6': nn.ReLU6
+        'relu6': nn.ReLU6,
+        'hsigmoid': nn.Hardsigmoid,
+        'h-swish': nn.Hardswish,
+        'silu': nn.SiLU
     }
     act = act.lower()
 
@@ -17,44 +21,68 @@ def make_divisible(x, divisible_by=8):
     return int(np.ceil(x * 1. / divisible_by)*divisible_by)
 
 class ConvBnAct(nn.Module):
-    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, bias, act, groups=1, num_feat=0) -> None:
+    def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=False, act='relu', groups=1) -> None:
         super(ConvBnAct, self).__init__()
 
         self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias)
-        self.bn = nn.BatchNorm2d(num_features=num_feat)
-        self.relu = activation(act)(inplace=True)
+        self.bn = nn.BatchNorm2d(num_features=out_channel)
+        self.act = activation(act)(inplace=True)
 
     def forward(self,x):
-        return self.relu(self.bn(self.conv(x)))
+        return self.act(self.bn(self.conv(x)))
 
 class ConvAct(nn.Module):
-    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, bias, act, groups=1) -> None:
+    def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=False,act='relu', groups=1) -> None:
         super(ConvAct, self).__init__()
 
         self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size= kernel_size,stride=stride, padding=padding, groups=groups, bias=bias)
-        self.relu = activation(act)(inplace=True)
+        self.act = activation(act)(inplace=True)
 
     def forward(self,x):
-        return self.relu(self.conv(x))
+        return self.act(self.conv(x))
 
 class ConvBn(nn.Module):
-    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, bias, groups=1, num_feat=0) -> None:
+    def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=False, groups=1) -> None:
         super(ConvBn, self).__init__()
 
         self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size= kernel_size,stride=stride, padding=padding, groups=groups, bias=bias)
-        self.bn = nn.BatchNorm2d(num_features=num_feat)
+        self.bn = nn.BatchNorm2d(num_features=out_channel)
 
     def forward(self,x):
         return self.bn(self.conv(x))
 
 class DepthWiseConv(nn.Module):
-    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, bias, act, groups=1, num_feat=0) -> None:
+    def __init__(self, in_channel, out_channel, kernel_size, stride, padding, bias, act, groups=1) -> None:
         super(DepthWiseConv, self).__init__()
 
         # dw
-        self.dw_conv = ConvBnAct(in_channel, in_channel, kernel_size, stride, padding, bias, act, groups, num_feat)
+        self.dw_conv = ConvBnAct(in_channel, in_channel, kernel_size, stride, padding, bias, act, groups)
         # pw
-        self.pw_conv = ConvBn(in_channel, out_channel, 1, 1, 0, bias, 1, out_channel)
+        self.pw_conv = ConvBn(in_channel, out_channel, 1, 1, 0, bias, 1)
 
     def forward(self,x):
-        return self.pw_conv(self.dw_conv(x))
+        return self.pw_conv(self.dw_conv(x))
+
+class SEBlock(nn.Module):
+    def __init__(self, in_channel, squeeze_channel):
+        super(SEBlock, self).__init__()
+
+        self.globpool = nn.AdaptiveAvgPool2d((1,1))
+        self.fc1 = nn.Conv2d(in_channel, squeeze_channel, 1)
+        self.fc2 = nn.Conv2d(squeeze_channel, in_channel, 1)
+        self.relu = activation('relu')(inplace=True)
+        self.hsigmoid = activation('hsigmoid')(inplace=True)
+
+    def scale(self, input):
+
+        x = self.globpool(input)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.hsigmoid(x)
+        return x
+
+    def forward(self, input):
+
+        scale = self.scale(input)
+        return scale * input
diff --git a/models/mobilenetv2.py b/models/mobilenetv2.py
@@ -11,14 +11,13 @@ def __init__(self, in_channel, out_channel, stride, expansion) -> None:
         self.skip_connection = self.stride == 1 and in_channel == out_channel
 
         if expansion == 1:
-            self.conv = blocks.DepthWiseConv(hidden_dim, out_channel, kernel_size=3, stride=stride, padding=1,bias=False, act='relu6', groups=hidden_dim,num_feat=hidden_dim)
+            self.conv = blocks.DepthWiseConv(hidden_dim, out_channel, kernel_size=3, stride=stride, padding=1,bias=False, act='relu6', groups=hidden_dim)
 
         else:
             self.conv = nn.Sequential(
-                #pw
-                blocks.ConvBnAct(in_channel, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False, act='relu6', groups=1, num_feat=hidden_dim),
-                #dw
-                blocks.DepthWiseConv(hidden_dim, out_channel, kernel_size=3, stride=stride, padding=1,bias=False, act='relu6', groups=hidden_dim,num_feat=hidden_dim)
+                blocks.ConvBnAct(in_channel, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False, act='relu6', groups=1),
+
+                blocks.DepthWiseConv(hidden_dim, out_channel, kernel_size=3, stride=stride, padding=1,bias=False, act='relu6', groups=hidden_dim)
             )
 
     def forward(self, x):
@@ -29,25 +28,17 @@ def forward(self, x):
             return self.conv(x)    
 
 class MobileNetV2(nn.Module):
-    def __init__(self, num_class, width_multi) -> None:
+    def __init__(self, model_cfg, num_class, width_multi) -> None:
         super(MobileNetV2, self).__init__()
 
         bottle_neck = InvertedResidual
         in_channel = 32
         last_channel = 1280
-        self.cfgs = [
-            [1, 16, 1 , 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 320, 1, 1]
-        ]
+        self.cfgs = model_cfg
 
         self.last_channel = blocks.make_divisible(last_channel * width_multi) if width_multi > 1.0 else last_channel
         self.layers = [ 
-                blocks.ConvBnAct(3, in_channel, kernel_size=3, stride=2, padding=1, bias=False, act='relu6', groups=1, num_feat=in_channel)
+                blocks.ConvBnAct(3, in_channel, kernel_size=3, stride=2, padding=1, bias=False, act='relu6', groups=1)
             ]
 
         for t,c,n,s in self.cfgs:
@@ -57,7 +48,7 @@ def __init__(self, num_class, width_multi) -> None:
                 in_channel = out_channel
 
         self.layers.append(
-            blocks.ConvBnAct(in_channel, self.last_channel, kernel_size=1, stride=1, padding=0, bias=False, act='relu6', groups=1, num_feat=self.last_channel)
+            blocks.ConvBnAct(in_channel, self.last_channel, kernel_size=1, stride=1, padding=0, bias=False, act='relu6', groups=1)
             )
         self.layers.append(nn.AdaptiveAvgPool2d((1,1)))
 

diff --git a/models/mobilenetv3.py b/models/mobilenetv3.py
@@ -0,0 +1,63 @@
+import torch
+import torch.nn as nn
+import models.blocks as blocks
+
+class BottleNeck(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel_size, exp_size, se, act, stride)-> None:
+        super(BottleNeck, self).__init__()
+
+        self.skip_connection = in_channel == out_channel and stride == 1
+        self.squeeze_channel = blocks.make_divisible(exp_size//4, 8)
+        self.block = nn.Sequential(
+            blocks.ConvBnAct(in_channel, exp_size, kernel_size=1, stride=1, padding=1//2, bias=False, act=act) if exp_size != in_channel else nn.Identity(),
+            blocks.ConvBnAct(exp_size, exp_size, kernel_size=kernel_size, stride=stride, act=act, groups=exp_size),
+            blocks.SEBlock(exp_size, self.squeeze_channel) if se == True else nn.Identity(),
+            blocks.ConvBn(exp_size, out_channel, kernel_size=1, stride=1, padding=kernel_size//2)
+        )
+
+    def forward(self, x):
+        res = self.block(x)
+        if self.skip_connection:
+            res += x
+        return res
+
+class MobileNetV3(nn.Module):
+    def __init__(self, model_cfg, num_class) -> None:
+        super(MobileNetV3, self).__init__()
+
+        self.config = model_cfg
+        # first conv layer
+        self.conv = blocks.ConvBnAct(in_channel=3, out_channel=16, kernel_size=3, stride=2, act='h-swish') 
+        self.layers = []
+        # bottle nect layer
+        for c in self.config:
+            kernel_size, exp_size, in_channel, out_channel, se, nl, s = c
+            act = 'relu' if nl=='RE' else 'h-swish'
+            self.layers.append(BottleNeck(in_channel, out_channel, kernel_size, exp_size, se, act, s))
+
+        last_out_channel = self.config[-1][3]
+        last_exp = self.config[-1][1]
+        out = 1024 if last_exp == 576 else 1280        
+        self.layers.append(
+            blocks.ConvBnAct(last_out_channel, last_exp, kernel_size=1,stride=1, act='h-swish')
+        )
+        self.layers = nn.Sequential(*self.layers)
+
+        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
+        # classifier
+        self.classifier = nn.Sequential(
+            nn.Linear(last_exp, out),
+            blocks.activation('h-swish')(inplace=True),
+            nn.Dropout(0.8),
+            nn.Linear(out, num_class)
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        for layer in self.layers:
+            x = layer(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x,1)
+        x = self.classifier(x)
+        return x
+
diff --git a/models/model_config.py b/models/model_config.py
@@ -14,5 +14,45 @@
         '19': {'repeat':[2, 2, 4, 4, 4],'torch_model': models.vgg19}
     }
 mobilenet_config = {
-        'v2': {'torch_model': models.mobilenet_v2}
+        'v2': {'cfg': [
+            #t,c,n,s = expansion, out-channel, repeat, stride 
+            [1, 16, 1 , 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1]
+        ],'torch_model': models.mobilenet_v2},
+        'v3-small': {'cfg':[
+            # [kernel, exp size, in_channels, out_channels, SEBlock(SE), activation function(NL), stride(s)] 
+            [3, 16, 16, 16, True, 'RE', 2],
+            [3, 72, 16, 24, False, 'RE', 2],
+            [3, 88, 24, 24, False, 'RE', 1],
+            [5, 96, 24, 40, True, 'HE', 2],
+            [5, 240, 40, 40, True, 'HE', 1],
+            [5, 240, 40, 40, True, 'HE', 1],
+            [5, 120, 40, 48, True, 'HE', 1],
+            [5, 144, 48, 48, True, 'HE', 1],
+            [5, 288, 48, 96, True, 'HE', 2],
+            [5, 576, 96, 96, True, 'HE', 1],
+            [5, 576, 96, 96, True, 'HE', 1]
+        ],'torch_model': models.mobilenet_v3_small},
+        'v3-large': {'cfg':[
+            [3, 16, 16, 16, False, 'RE', 1],
+            [3, 64, 16, 24, False, 'RE', 2],
+            [3, 72, 24, 24, False, 'RE', 1],
+            [5, 72, 24, 40, True, 'RE', 2],
+            [5, 120, 40, 40, True, 'RE', 1],
+            [5, 120, 40, 40, True, 'RE', 1],
+            [3, 240, 40, 80, False, 'HE', 2],
+            [3, 200, 80, 80, False, 'HE', 1],
+            [3, 184, 80, 80, False, 'HE', 1],
+            [3, 184, 80, 80, False, 'HE', 1],
+            [3, 480, 80, 112, True, 'HE', 1],
+            [3, 672, 112, 112, True, 'HE', 1],
+            [5, 672, 112, 160, True, 'HE', 2],
+            [5, 960, 160, 160, True, 'HE', 1],
+            [5, 960, 160, 160, True, 'HE', 1]
+        ], 'torch_model': models.mobilenet_v3_large}
     }
diff --git a/models/resnet.py b/models/resnet.py
@@ -6,11 +6,11 @@ class ResidualBlock(nn.Module):
     def __init__(self, in_channel, out_channel, downsample):
         super(ResidualBlock, self).__init__()
 
-        self.conv1 = blocks.ConvBnAct(in_channel=in_channel, out_channel=out_channel, kernel_size=3, stride=2 if downsample else 1, padding=1, bias=False, act='relu', groups=1, num_feat=out_channel)
-        self.conv2 = blocks.ConvBn(in_channel=out_channel, out_channel=out_channel, kernel_size=3, stride=1, padding=1, bias=False, groups=1, num_feat=out_channel)
+        self.conv1 = blocks.ConvBnAct(in_channel=in_channel, out_channel=out_channel, kernel_size=3, stride=2 if downsample else 1, padding=1, bias=False, act='relu', groups=1)
+        self.conv2 = blocks.ConvBn(in_channel=out_channel, out_channel=out_channel, kernel_size=3, stride=1, padding=1, bias=False, groups=1)
 
         if downsample:
-            self.identity = blocks.ConvBn(in_channel=in_channel, out_channel=out_channel, kernel_size=1, stride=2, padding=0, bias=False, groups=1, num_feat=out_channel)
+            self.identity = blocks.ConvBn(in_channel=in_channel, out_channel=out_channel, kernel_size=1, stride=2, padding=0, bias=False, groups=1)
         else:
             self.identity = nn.Sequential()
 
@@ -30,12 +30,12 @@ def __init__(self, in_channel, out_channel, downsample) -> None:
         super().__init__()
 
         self.downsample = downsample
-        self.conv1 = blocks.ConvBnAct(in_channel=in_channel, out_channel=out_channel//4, kernel_size=1, stride=1, padding=0, bias=False, act='relu', groups=1, num_feat=out_channel//4)
-        self.conv2 = blocks.ConvBnAct(in_channel=out_channel//4, out_channel=out_channel//4, kernel_size=3, stride=2 if downsample else 1, padding=1, bias=False, act='relu', groups=1, num_feat=out_channel//4)
-        self.conv3 = blocks.ConvBn(in_channel=out_channel//4, out_channel=out_channel, kernel_size=1, stride=1, padding=0, bias=False, groups=1, num_feat=out_channel)
+        self.conv1 = blocks.ConvBnAct(in_channel=in_channel, out_channel=out_channel//4, kernel_size=1, stride=1, padding=0, bias=False, act='relu', groups=1)
+        self.conv2 = blocks.ConvBnAct(in_channel=out_channel//4, out_channel=out_channel//4, kernel_size=3, stride=2 if downsample else 1, padding=1, bias=False, act='relu', groups=1)
+        self.conv3 = blocks.ConvBn(in_channel=out_channel//4, out_channel=out_channel, kernel_size=1, stride=1, padding=0, bias=False, groups=1)
 
         if self.downsample or in_channel != out_channel:
-            self.identity = blocks.ConvBn(in_channel=in_channel, out_channel=out_channel, kernel_size=1, stride=2 if downsample else 1, padding=0, bias=False, groups=1, num_feat=out_channel)
+            self.identity = blocks.ConvBn(in_channel=in_channel, out_channel=out_channel, kernel_size=1, stride=2 if downsample else 1, padding=0, bias=False, groups=1)
         else:
             self.identity = nn.Sequential()
         self.relu = nn.ReLU()