reformat code

lucasxlu · Sep 19, 2020 · 75f621a · 75f621a
1 parent 9e2aecf
commit 75f621a
Show file tree

Hide file tree

Showing 18 changed files with 1,241 additions and 2,467 deletions.
diff --git a/research/cbir/README.md b/research/cbir/README.md
@@ -6,15 +6,15 @@ Exploring deep metric learning & hash methods to build efficient visual search s
 
 ![index](https://raw.githubusercontent.com/lucasxlu/CbirAnnoTool/master/index.png)
 
-### Backbone
-| Architecture | Supervision | Status |
-| :---: |:---: |:---: |
-| DenseNet121 | Softmax | [YES] |
-| DenseNet121 | CenterLoss | [YES] |
-| DenseNet121 | A-Softmax | [YES] |
-| DenseNet121 | ArcLoss | [YES] |
-| ResNeXt50 | A-Softmax | [TODO] |
-| SeResNeXt50 | A-Softmax | [TODO] |
+### Architecture and Loss
+#### Supported Architecture
+- [x] DenseNet121
+
+#### Supported Loss
+- [x] SoftmaxLoss
+- [x] CenterLoss
+- [x] ASoftmaxLoss
+- [x] ArcLoss
 
 
 ### Dependency
@@ -35,6 +35,7 @@ After manually cleaning noise samples in each cluster, you can upgrade your embe
 4. Wang F, Xiang X, Cheng J, Yuille AL. [Normface: L2 hypersphere embedding for face verification](https://arxiv.org/pdf/1704.06369v4.pdf). InProceedings of the 2017 ACM on Multimedia Conference 2017 Oct 23 (pp. 1041-1049). ACM.
 5. Liu, Weiyang, et al. ["Sphereface: Deep hypersphere embedding for face recognition."](http://openaccess.thecvf.com/content_cvpr_2017/papers/Liu_SphereFace_Deep_Hypersphere_CVPR_2017_paper.pdf) The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Vol. 1. 2017.
 6. Hadsell, Raia, Sumit Chopra, and Yann LeCun. ["Dimensionality reduction by learning an invariant mapping."](http://www.cs.toronto.edu/~hinton/csc2535/readings/hadsell-chopra-lecun-06-1.pdf) null. IEEE, 2006.
+7. Luo H, Gu Y, Liao X, et al. [Bag of tricks and a strong baseline for deep person re-identification](http://openaccess.thecvf.com/content_CVPRW_2019/papers/TRMTMCT/Luo_Bag_of_Tricks_and_a_Strong_Baseline_for_Deep_Person_CVPRW_2019_paper.pdf)[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 2019: 0-0.
 
 
 ## License

diff --git a/research/cbir/cfg.py b/research/cbir/cfg.py
@@ -1,13 +1,11 @@
 from collections import OrderedDict
 
 cfg = OrderedDict()
-cfg['tissue_physiology_img_base'] = '/data/lucasxu/Dataset/TissuePhysiologySku'
-cfg['light_clothing_img_base'] = '/data/lucasxu/Dataset/LightClothingSku'
+cfg['img_base'] = '/data/lucasxu/Dataset/ImageDataset'
 cfg['epoch'] = 300
 cfg['init_lr'] = 0.01
 cfg['lr_decay_step'] = 50
 cfg['weight_decay'] = 1e-4
-cfg['out_num'] = 51
-# cfg['out_num'] = 162
-cfg['batch_size'] = 128
-cfg['data_aug_samples'] = 1000
+cfg['out_num'] = 425
+cfg['batch_size'] = 64
+cfg['data_aug_samples'] = 1000
diff --git a/research/cbir/clustering.py b/research/cbir/clustering.py
@@ -2,29 +2,29 @@
 # Please carefully tune the hyper-param k, to make sure that max(x_i - \mu_i) <= \tau (such as 0.3)
 # to avoid introducing noise
 # author: @LucasX
-import os
-import math
 import argparse
-
-from PIL import Image
-from sklearn.cluster import KMeans
-import numpy as np
+import math
+import os
 import shutil
-from skimage import io
 
+import numpy as np
 import torch
 import torch.nn as nn
-from torch.nn import Parameter
 import torch.nn.functional as F
+from PIL import Image
+from skimage import io
 from skimage.color import gray2rgb, rgba2rgb
+from sklearn.cluster import KMeans
+from torch import Tensor
+from torch.nn import Parameter
 from torchvision import models
 from torchvision.transforms import transforms
 
 parser = argparse.ArgumentParser()
 parser.add_argument('-k', type=int)
 parser.add_argument('-out_num', type=int, default=369)
 parser.add_argument('-checkpoint', type=str,
-                    default='/data/lucasxu/ModelZoo/DenseNet121_Embedding_AngularLoss.pth')
+                    default='/data/lucasxu/ModelZoo/DenseNet121_Embedding_ASoftmaxLoss.pth')
 parser.add_argument('-max_l1_dist', type=float)
 parser.add_argument('-use_gpu', type=bool, default=True)
 parser.add_argument('-algorithm', type=str, default='kmeans')
@@ -42,29 +42,55 @@ def myphi(x, m):
            x ** 8 / math.factorial(8) - x ** 9 / math.factorial(9)
 
 
+class Flatten(nn.Module):
+    """
+    self constructed Flatten Module
+    Note: use nn.Flatten() directly after PyTorch 1.5 or higher
+    """
+
+    __constants__ = ['start_dim', 'end_dim']
+    start_dim: int
+    end_dim: int
+
+    def __init__(self, start_dim: int = 1, end_dim: int = -1) -> None:
+        super(Flatten, self).__init__()
+        self.start_dim = start_dim
+        self.end_dim = end_dim
+
+    def forward(self, input: Tensor) -> Tensor:
+        return input.flatten(self.start_dim, self.end_dim)
+
+
 class DenseNet121(nn.Module):
     """
-    DenseNet with features, constructed for AngularLoss
+    DenseNet121 as backbone, constructed for ASoftmaxLoss
     """
 
-    def __init__(self, num_cls):
+    def __init__(self, num_cls, embedding_dim=1024):
         super(DenseNet121, self).__init__()
         self.__class__.__name__ = 'DenseNet121'
         densenet121 = models.densenet121(pretrained=True)
+        self.features = densenet121.features
         num_ftrs = densenet121.classifier.in_features
-        densenet121.classifier = nn.Sequential(nn.Linear(num_ftrs, 1024), AngleLinear(1024, num_cls))
-        self.model = densenet121
+        self.embedding = nn.Sequential(
+            nn.ReLU(inplace=True),
+            nn.AdaptiveAvgPool2d((1, 1)),
+            Flatten(),
+            nn.Linear(num_ftrs, embedding_dim, bias=False),
+            nn.BatchNorm1d(embedding_dim)
+        )
+        self.classifier = AngleLinear(embedding_dim, num_cls)
 
     def forward(self, x):
-        for name, module in self.model.named_children():
-            if name == 'features':
-                feats = module(x)
-                feats = F.relu(feats, inplace=True)
-                feats = F.avg_pool2d(feats, kernel_size=7, stride=1).view(feats.size(0), -1)
-            elif name == 'classifier':
-                out = module(feats)
+        """
+        feedforward an image, return pooling features (with BNNeck) and logits before softmax layer
+        :param x:
+        :return:
+        """
+        feats = self.embedding(self.features(x))
+        logits = self.classifier(feats)
 
-        return feats, out
+        return feats, logits
 
     def num_flat_features(self, x):
         size = x.size()[1:]  # all dimensions except the batch dimension
@@ -95,13 +121,13 @@ def __init__(self, in_features, out_features, m=4, phiflag=True):
 
     def forward(self, input):
         x = input  # size=(B,F)    F is feature len
-        w = self.weight  # size=(F,Classnum) F=in_features Classnum=out_features
+        w = self.weight  # size=(F, ClassNum) F=in_features  ClassNum=out_features
 
         ww = w.renorm(2, 1, 1e-5).mul(1e5)
         xlen = x.pow(2).sum(1).pow(0.5)  # size=B
-        wlen = ww.pow(2).sum(0).pow(0.5)  # size=Classnum
+        wlen = ww.pow(2).sum(0).pow(0.5)  # size= ClassNum
 
-        cos_theta = x.mm(ww)  # size=(B,Classnum)
+        cos_theta = x.mm(ww)  # size=(B, ClassNum)
         cos_theta = cos_theta / xlen.view(-1, 1) / wlen.view(1, -1)
         cos_theta = cos_theta.clamp(-1, 1)
 
@@ -120,44 +146,16 @@ def forward(self, input):
         phi_theta = phi_theta * xlen.view(-1, 1)
         output = (cos_theta, phi_theta)
 
-        return output  # size=(B,Classnum,2)
-
-
-class AngularLoss(nn.Module):
-    def __init__(self, gamma=0):
-        super(AngularLoss, self).__init__()
-        self.gamma = gamma
-        self.it = 0
-        self.LambdaMin = 5.0
-        self.LambdaMax = 1500.0
-        self.lamb = 1500.0
-
-    def forward(self, input, target):
-        self.it += 1
-        cos_theta, phi_theta = input
-        target = target.view(-1, 1)  # size=(B,1)
-
-        index = cos_theta.data * 0.0  # size=(B,Classnum)
-        index.scatter_(1, target.data.view(-1, 1), 1)
-        index = index.byte()
-
-        self.lamb = max(self.LambdaMin, self.LambdaMax / (1 + 0.1 * self.it))
-        output = cos_theta * 1.0  # size=(B,Classnum)
-        output[index] -= cos_theta[index] * (1.0 + 0) / (1 + self.lamb)
-        output[index] += phi_theta[index] * (1.0 + 0) / (1 + self.lamb)
-
-        logpt = F.log_softmax(output, dim=0)
-        logpt = logpt.gather(1, target)
-        logpt = logpt.view(-1)
-        pt = logpt.data.exp()
-
-        loss = -1 * (1 - pt) ** self.gamma * logpt
-        loss = loss.mean()
-
-        return loss
+        return output  # size=(B, ClassNum, 2)
 
 
 def load_model_with_weights(model, model_path):
+    """
+    load model with pretrained checkpoint
+    :param model:
+    :param model_path:
+    :return:
+    """
     print(model)
     model = model.float()
     model_name = model.__class__.__name__