diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..adc29960
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,165 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Mac OS files
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..9fdea7f2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Aleksandr Karakulev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..4834fd6f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,55 @@
+# RLVI: Robust Learning via Variational Inference
+
+Implementation of [Adaptive Robust Learning using Latent Bernoulli Variables](https://arxiv.org/pdf/2312.00585).
+
+Our method, called RLVI, enables robust maximization of the likelihood. The learned parametric model is thus accurate even when the training data is corrupted. Additionally, RLVI is well-suited for online or stochastic optimization as it does not require estimating the total ratio of contaminated data and adaptively infers the probabilities of sample corruption.  
+
+
+## Standard parameter estimation
+Benchmark, reproduced from [(Osama et al., 2020)](https://doi.org/10.1109/OJSP.2020.3039632), that compares robust learning algorithms on the four test problems with corrupted samples in the training data: linear and logistic regression, principal component analysis, covariance estimation.
+```
+cd standard-learning
+python3 main.py
+```
+
+## Online learning
+Binary classification for Human Activity Recognition dataset [(Amine El Helou, 2023)](https://www.mathworks.com/matlabcentral/fileexchange/54138-sensor-har-recognition-app), performed in batches with varying level of corruption to simulate the online learning setting.
+```
+cd online-learning
+python3 main.py
+```
+
+Accuracy levels are higher with RLVI than with stochastic likelihood maximization (SGD).
+<p align="center">
+  <img src="img/online-learning.png" alt="Online learning" width="300"/>  
+</p>
+
+## Deep learning
+### Synthetic noise (MNIST, CIFAR10, CIFAR100)
+Experiments with the datasets in which training data is corrupted with synthetic noise.
+There are four types of noise: `symmetric`, `asymmetric`, `pairflip`, and `instance`. Noise rate from 0 to 1 needs to be specified to corrupt the training set.
+For the method, one can use `rlvi` or one of the following: `regular`, `coteaching` [(Han et al., 2018)](https://papers.nips.cc/paper_files/paper/2018/hash/a19744e268754fb0148b017647355b7b-Abstract.html), `jocor` [(Wei et al., 2020)](https://openaccess.thecvf.com/content_CVPR_2020/papers/Wei_Combating_Noisy_Labels_by_Agreement_A_Joint_Training_Method_with_CVPR_2020_paper.pdf), `cdr` [(Xia et al., 2020)](https://openreview.net/forum?id=Eql5b1_hTE4), `usdnl` [(Xu et al., 2023)](https://doi.org/10.1609/aaai.v37i9.26264), and `bare` [(Patel & Sastry, 2023)](https://openaccess.thecvf.com/content/WACV2023/papers/Patel_Adaptive_Sample_Selection_for_Robust_Learning_Under_Label_Noise_WACV_2023_paper.pdf).
+
+Example:
+```
+cd deep-learning
+python3 main.py \
+        --method=rlvi \
+        --dataset=mnist \
+        --noise_type=pairflip \
+        --noise_rate=0.45
+```
+
+<p align="center">
+  <img src="img/deep-learning-synthetic.png" alt="Deep learning synthetic" width="300"/>  
+</p>
+
+### Real noise (Food101)
+Experiments with the dataset in which training data is corrupted by nature: some of the training images are mislabeled and contain some noise.
+For the method, one can specify `rlvi` or one of the following: `regular`, `coteaching`, `jocor`, `cdr`, `usdnl`, `bare`.
+
+Example:
+```
+cd deep-learning
+python3 food.py --method=rlvi
+```
diff --git a/Roboto.ttf b/Roboto.ttf
new file mode 100644
index 00000000..0a1657d2
Binary files /dev/null and b/Roboto.ttf differ
diff --git a/deep-learning/data_load.py b/deep-learning/data_load.py
new file mode 100644
index 00000000..51a4aa1c
--- /dev/null
+++ b/deep-learning/data_load.py
@@ -0,0 +1,515 @@
+import numpy as np
+import torch
+import torch.utils.data as Data
+from PIL import Image
+import os
+import sys
+if sys.version_info[0] == 2:
+    import cPickle as pickle
+else:
+    import pickle
+
+import data_tools as tools
+
+
+class Mnist(Data.Dataset):
+    urls = [
+        'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
+        'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
+        'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
+        'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
+    ]
+    raw_folder = 'raw'
+    processed_folder = 'processed'
+    training_file = 'training.pt'
+    test_file = 'test.pt'
+
+    def __init__(self, root, 
+                 train=True, transform=None, target_transform=None, download=False,
+                 dataset='mnist', noise_type='symmetric', noise_rate=0.5, split_per=0.9, random_seed=1, num_class=10):
+        self.root = os.path.join(os.path.expanduser(root), 'MNIST')
+        self.transform = transform
+        self.target_transform = target_transform
+        self.train = train 
+
+        if download:
+            self.download()
+
+        if not self._check_exists():
+            raise RuntimeError('Dataset not found.' +
+                               ' You can use download=True to download it')
+
+        self.train_data, self.train_labels = torch.load(
+            os.path.join(self.root, self.processed_folder, self.training_file))
+        self.train_labels = np.array(self.train_labels)
+
+        # clean images and noisy labels (training and validation)
+        if noise_rate > 0:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split(
+                self.train_data, self.train_labels, dataset, noise_type, noise_rate, split_per, random_seed, num_class
+            )
+        else:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split_without_noise(
+                self.train_data, self.train_labels, split_per, random_seed
+            )
+
+    def __getitem__(self, index):
+        if self.train:
+            img, label = self.train_data[index], self.train_labels[index]
+        else:
+            img, label = self.val_data[index], self.val_labels[index]
+            
+        img = Image.fromarray(img.numpy())
+           
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+
+    def __len__(self):            
+        if self.train:
+            return len(self.train_data)
+        else:
+            return len(self.val_data)
+
+    def _check_exists(self):
+        return os.path.exists(os.path.join(self.root, self.processed_folder, self.training_file)) and \
+            os.path.exists(os.path.join(self.root, self.processed_folder, self.test_file))
+
+    def download(self):
+        """Download the MNIST data if it doesn't exist in processed_folder already."""
+        from six.moves import urllib
+        import gzip
+
+        if self._check_exists():
+            return
+
+        # download files
+        try:
+            os.makedirs(os.path.join(self.root, self.raw_folder))
+            os.makedirs(os.path.join(self.root, self.processed_folder))
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                raise
+
+        for url in self.urls:
+            print('Downloading ' + url)
+            data = urllib.request.urlopen(url)
+            filename = url.rpartition('/')[2]
+            file_path = os.path.join(self.root, self.raw_folder, filename)
+            with open(file_path, 'wb') as f:
+                f.write(data.read())
+            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
+                    gzip.GzipFile(file_path) as zip_f:
+                out_f.write(zip_f.read())
+            os.unlink(file_path)
+
+        # process and save as torch files
+        print('Processing...')
+
+        training_set = (
+            tools.read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
+            tools.read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
+        )
+        test_set = (
+            tools.read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
+            tools.read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
+        )
+        with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f:
+            torch.save(training_set, f)
+        with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f:
+            torch.save(test_set, f)
+
+        print('Done!')
+
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        tmp = 'train' if self.train is True else 'test'
+        fmt_str += '    Split: {}\n'.format(tmp)
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
+ 
+
+class MnistTest(Data.Dataset):
+    processed_folder = 'processed'
+    test_file = 'test.pt'
+
+    def __init__(self, root, transform=None, target_transform=None):
+        self.root = os.path.join(os.path.expanduser(root), 'MNIST')
+        self.transform = transform
+        self.target_transform = target_transform
+
+        self.test_data, self.test_labels = torch.load(
+                os.path.join(self.root, self.processed_folder, self.test_file))
+        self.test_labels = np.array(self.test_labels)
+
+        
+    def __getitem__(self, index):
+        img, label = self.test_data[index], self.test_labels[index]
+        img = Image.fromarray(img.numpy())
+        
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+    
+    def __len__(self):
+        return len(self.test_data)
+  
+    
+class Cifar10(Data.Dataset):
+    base_folder = 'cifar-10-batches-py'
+    url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+    filename = "cifar-10-python.tar.gz"
+    tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
+    train_list = [
+        ['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
+        ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
+        ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
+        ['data_batch_4', '634d18415352ddfa80567beed471001a'],
+        ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
+    ]
+
+    test_list = [
+        ['test_batch', '40351d587109b95175f43aff81a1287e'],
+    ]
+
+    def __init__(self, root, train=True, transform=None, target_transform=None, download=False,
+                 dataset='cifar10', noise_type='symmetric', noise_rate=0.5, 
+                 split_per=0.9, random_seed=1, num_class=10
+                ):
+        self.root = os.path.join(os.path.expanduser(root), 'CIFAR')
+        self.transform = transform
+        self.target_transform = target_transform
+        self.train = train
+
+        if download:
+            self.download()
+
+        if not self._check_integrity():
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+
+        # now load the picked numpy arrays
+        self.train_data = []
+        self.train_labels = []
+        for fentry in self.train_list:
+            f = fentry[0]
+            file = os.path.join(self.root, self.base_folder, f)
+            fo = open(file, 'rb')
+            if sys.version_info[0] == 2:
+                entry = pickle.load(fo)
+            else:
+                entry = pickle.load(fo, encoding='latin1')
+            self.train_data.append(entry['data'])
+            if 'labels' in entry:
+                self.train_labels += entry['labels']
+            else:
+                self.train_labels += entry['fine_labels']
+            fo.close()
+
+        self.train_data = np.concatenate(self.train_data)
+        self.train_labels = np.array(self.train_labels)
+        # self.train_data = self.train_data.reshape((50000, 3, 32, 32))
+        # self.train_data = self.train_data.transpose((0, 2, 3, 1))  # convert to HWC
+
+        # clean images and noisy labels (training and validation)
+        if noise_rate > 0:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split(
+                self.train_data, self.train_labels, dataset, noise_type, noise_rate, split_per, random_seed, num_class
+            )
+        else:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split_without_noise(
+                self.train_data, self.train_labels, split_per, random_seed
+            )
+
+        if self.train:      
+            self.train_data = self.train_data.reshape((-1, 3, 32, 32))
+            self.train_data = self.train_data.transpose((0, 2, 3, 1))
+        
+        else:
+            self.val_data = self.val_data.reshape((-1, 3, 32, 32))
+            self.val_data = self.val_data.transpose((0, 2, 3, 1))
+        
+    def __getitem__(self, index):
+           
+        if self.train:
+            img, label = self.train_data[index], self.train_labels[index]
+            
+        else:
+            img, label = self.val_data[index], self.val_labels[index]
+            
+        img = Image.fromarray(img)
+           
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+
+    def __len__(self):
+        if self.train:
+            return len(self.train_data)
+        else:
+            return len(self.val_data)
+
+    def _check_integrity(self):
+        root = self.root
+        for fentry in (self.train_list + self.test_list):
+            filename, md5 = fentry[0], fentry[1]
+            fpath = os.path.join(root, self.base_folder, filename)
+            if not tools.check_integrity(fpath, md5):
+                return False
+        return True
+
+    def download(self):
+        import tarfile
+
+        if self._check_integrity():
+            print('Files already downloaded and verified')
+            return
+
+        root = self.root
+        tools.download_url(self.url, root, self.filename, self.tgz_md5)
+
+        # extract file
+        cwd = os.getcwd()
+        tar = tarfile.open(os.path.join(root, self.filename), "r:gz")
+        os.chdir(root)
+        tar.extractall()
+        tar.close()
+        os.chdir(cwd)
+
+        
+class Cifar10Test(Data.Dataset):
+    base_folder = 'cifar-10-batches-py'
+    test_list = [
+        ['test_batch', '40351d587109b95175f43aff81a1287e'],
+    ]
+
+    def __init__(self, root, transform=None, target_transform=None):
+        self.root = os.path.join(os.path.expanduser(root), 'CIFAR')
+        self.transform = transform
+        self.target_transform = target_transform
+
+        f = self.test_list[0][0]
+        file = os.path.join(self.root, self.base_folder, f)
+        fo = open(file, 'rb')
+        if sys.version_info[0] == 2:
+            entry = pickle.load(fo)
+        else:
+            entry = pickle.load(fo, encoding='latin1')
+        self.test_data = entry['data']
+        if 'labels' in entry:
+            self.test_labels = entry['labels']
+        else:
+            self.test_labels = entry['fine_labels']
+        fo.close()
+        self.test_data = self.test_data.reshape((-1, 3, 32, 32))
+        self.test_data = self.test_data.transpose((0, 2, 3, 1))
+        self.test_labels = np.array(self.test_labels)
+
+    def __getitem__(self, index):
+        img, label = self.test_data[index], self.test_labels[index]
+        img = Image.fromarray(img)
+        
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+    
+    def __len__(self):
+        return len(self.test_data)
+
+    
+class Cifar100(Data.Dataset):
+
+    base_folder = 'cifar-100-python'
+    url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+    filename = "cifar-100-python.tar.gz"
+    tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
+    train_list = [
+        ['train', '16019d7e3df5f24257cddd939b257f8d'],
+    ]
+
+    test_list = [
+        ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+    ]
+
+    def __init__(self, root, train=True, transform=None, target_transform=None, download=False,
+                 dataset='cifar100', noise_type='symmetric', noise_rate=0.5, 
+                 split_per=0.9, random_seed=1, num_class=100
+                ):
+        self.root = os.path.join(os.path.expanduser(root), 'CIFAR')
+        self.transform = transform
+        self.target_transform = target_transform
+        self.train = train
+
+        if download:
+            self.download()
+
+        if not self._check_integrity():
+            raise RuntimeError('Dataset not found or corrupted.' +
+                               ' You can use download=True to download it')
+
+        # now load the picked numpy arrays
+        self.train_data = []
+        self.train_labels = []
+        for fentry in self.train_list:
+            f = fentry[0]
+            file = os.path.join(self.root, self.base_folder, f)
+            fo = open(file, 'rb')
+            if sys.version_info[0] == 2:
+                entry = pickle.load(fo)
+            else:
+                entry = pickle.load(fo, encoding='latin1')
+            self.train_data.append(entry['data'])
+            if 'labels' in entry:
+                self.train_labels += entry['labels']
+            else:
+                self.train_labels += entry['fine_labels']
+            fo.close()
+
+        self.train_data = np.concatenate(self.train_data)
+        self.train_labels = np.array(self.train_labels)
+
+        # clean images and noisy labels (training and validation)
+        if noise_rate > 0:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split(
+                self.train_data, self.train_labels, dataset, noise_type, noise_rate, split_per, random_seed, num_class
+            )
+        else:
+            self.train_data, self.val_data, self.train_labels, self.val_labels, self.noise_mask = tools.dataset_split_without_noise(
+                self.train_data, self.train_labels, split_per, random_seed
+            )
+
+        if self.train:      
+            self.train_data = self.train_data.reshape((-1, 3, 32, 32))
+            self.train_data = self.train_data.transpose((0, 2, 3, 1)) 
+        
+        else:
+            self.val_data = self.val_data.reshape((-1, 3, 32, 32))
+            self.val_data = self.val_data.transpose((0, 2, 3, 1))
+
+    def __getitem__(self, index):
+        if self.train:
+            img, label = self.train_data[index], self.train_labels[index]   
+        else:
+            img, label = self.val_data[index], self.val_labels[index]
+            
+        img = Image.fromarray(img)
+           
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+
+    def __len__(self):
+        if self.train:
+            return len(self.train_data)
+        else:
+            return len(self.val_data)
+
+    def _check_integrity(self):
+        root = self.root
+        for fentry in (self.train_list + self.test_list):
+            filename, md5 = fentry[0], fentry[1]
+            fpath = os.path.join(root, self.base_folder, filename)
+            if not tools.check_integrity(fpath, md5):
+                return False
+        return True
+
+    def download(self):
+        import tarfile
+
+        if self._check_integrity():
+            print('Files already downloaded and verified')
+            return
+
+        root = self.root
+        tools.download_url(self.url, root, self.filename, self.tgz_md5)
+
+        # extract file
+        cwd = os.getcwd()
+        tar = tarfile.open(os.path.join(root, self.filename), "r:gz")
+        os.chdir(root)
+        tar.extractall()
+        tar.close()
+        os.chdir(cwd)
+
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        tmp = 'train' if self.train is True else 'test'
+        fmt_str += '    Split: {}\n'.format(tmp)
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
+        
+        
+class Cifar100Test(Data.Dataset):
+
+    base_folder = 'cifar-100-python'
+    test_list = [
+        ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+    ]
+
+    def __init__(self, root, transform=None, target_transform=None):
+        self.root = os.path.join(os.path.expanduser(root), 'CIFAR')
+        self.transform = transform
+        self.target_transform = target_transform
+
+        f = self.test_list[0][0]
+        file = os.path.join(self.root, self.base_folder, f)
+        fo = open(file, 'rb')
+        if sys.version_info[0] == 2:
+            entry = pickle.load(fo)
+        else:
+            entry = pickle.load(fo, encoding='latin1')
+        self.test_data = entry['data']
+        if 'labels' in entry:
+            self.test_labels = entry['labels']
+        else:
+            self.test_labels = entry['fine_labels']
+        fo.close()
+        self.test_data = self.test_data.reshape((-1, 3, 32, 32))
+        self.test_data = self.test_data.transpose((0, 2, 3, 1))
+        self.test_labels = np.array(self.test_labels)
+
+    def __getitem__(self, index):
+        img, label = self.test_data[index], self.test_labels[index]
+        img = Image.fromarray(img)
+        if self.transform is not None:
+            img = self.transform(img)
+            
+        if self.target_transform is not None:
+            label = self.target_transform(label)
+     
+        return img, label, index
+    
+    def __len__(self):
+        return len(self.test_data)
diff --git a/deep-learning/data_tools.py b/deep-learning/data_tools.py
new file mode 100644
index 00000000..039a0062
--- /dev/null
+++ b/deep-learning/data_tools.py
@@ -0,0 +1,503 @@
+import os
+import hashlib
+import errno
+import codecs
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+from scipy import stats
+
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def get_int(b):
+    return int(codecs.encode(b, 'hex'), 16)
+
+
+def read_label_file(path):
+    with open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2049
+        length = get_int(data[4:8])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=8)
+        parsed = np.array(parsed)
+        return torch.from_numpy(parsed).view(length).long()
+
+
+def read_image_file(path):
+    with open(path, 'rb') as f:
+        data = f.read()
+        assert get_int(data[:4]) == 2051
+        length = get_int(data[4:8])
+        num_rows = get_int(data[8:12])
+        num_cols = get_int(data[12:16])
+        parsed = np.frombuffer(data, dtype=np.uint8, offset=16)
+        parsed = np.array(parsed)
+        return torch.from_numpy(parsed).view(length, num_rows, num_cols)
+
+
+def check_integrity(fpath, md5):
+    if not os.path.isfile(fpath):
+        return False
+    md5o = hashlib.md5()
+    with open(fpath, 'rb') as f:
+        # read in 1MB chunks
+        for chunk in iter(lambda: f.read(1024 * 1024), b''):
+            md5o.update(chunk)
+    md5c = md5o.hexdigest()
+    if md5c != md5:
+        return False
+    return True
+
+
+def download_url(url, root, filename, md5):
+    from six.moves import urllib
+
+    root = os.path.expanduser(root)
+    fpath = os.path.join(root, filename)
+
+    try:
+        os.makedirs(root)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+
+    # downloads file
+    if os.path.isfile(fpath) and check_integrity(fpath, md5):
+        print('Using downloaded and verified file: ' + fpath)
+    else:
+        try:
+            print('Downloading ' + url + ' to ' + fpath)
+            urllib.request.urlretrieve(url, fpath)
+        except:
+            if url[:5] == 'https':
+                url = url.replace('https:', 'http:')
+                print('Failed download. Trying https -> http instead.'
+                      ' Downloading ' + url + ' to ' + fpath)
+                urllib.request.urlretrieve(url, fpath)
+
+
+def list_dir(root, prefix=False):
+    """List all directories at a given root
+    Args:
+        root (str): Path to directory whose folders need to be listed
+        prefix (bool, optional): If true, prepends the path to each result, otherwise
+            only returns the name of the directories found
+    """
+    root = os.path.expanduser(root)
+    directories = list(
+        filter(
+            lambda p: os.path.isdir(os.path.join(root, p)),
+            os.listdir(root)
+        )
+    )
+
+    if prefix is True:
+        directories = [os.path.join(root, d) for d in directories]
+
+    return directories
+
+
+def list_files(root, suffix, prefix=False):
+    """List all files ending with a suffix at a given root
+    Args:
+        root (str): Path to directory whose folders need to be listed
+        suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
+            It uses the Python "str.endswith" method and is passed directly
+        prefix (bool, optional): If true, prepends the path to each result, otherwise
+            only returns the name of the files found
+    """
+    root = os.path.expanduser(root)
+    files = list(
+        filter(
+            lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix),
+            os.listdir(root)
+        )
+    )
+
+    if prefix is True:
+        files = [os.path.join(root, d) for d in files]
+
+    return files
+
+
+# basic function
+def multiclass_noisify(y, P, random_state=1):
+    """ Flip classes according to transition probability matrix T.
+    It expects a number between 0 and the number of classes - 1.
+    """
+    assert P.shape[0] == P.shape[1]
+    assert np.max(y) < P.shape[0]
+
+    # row stochastic matrix
+    assert_array_almost_equal(P.sum(axis=1), np.ones(P.shape[1]))
+    assert (P >= 0.0).all()
+
+    m = y.shape[0]
+    new_y = y.copy()
+    flipper = np.random.RandomState(random_state)
+
+    for idx in np.arange(m):
+        i = y[idx]
+        # draw a vector with only an 1
+        flipped = flipper.multinomial(1, P[i, :][0], 1)[0]
+        new_y[idx] = np.where(flipped == 1)[0]
+
+    return new_y
+
+
+def noisify_pairflip(y_train, noise, random_state=1, nb_classes=10):
+    """mistakes:
+        flip in the pair
+    """
+    P = np.eye(nb_classes)
+    n = noise
+
+    if n > 0.0:
+        # 0 -> 1
+        P[0, 0], P[0, 1] = 1. - n, n
+        for i in range(1, nb_classes-1):
+            P[i, i], P[i, i + 1] = 1. - n, n
+        P[nb_classes-1, nb_classes-1], P[nb_classes-1, 0] = 1. - n, n
+
+        y_train_noisy = multiclass_noisify(y_train, P=P,
+                                           random_state=random_state)
+        actual_noise = (y_train_noisy != y_train).mean()
+        assert actual_noise > 0.0
+        print('Actual noise %.2f' % actual_noise)
+        y_train = y_train_noisy
+
+    return y_train, actual_noise,P
+
+def noisify_multiclass_symmetric(y_train, noise, random_state=None, nb_classes=10):
+    """mistakes:
+        flip in the symmetric way
+    """
+    P = np.ones((nb_classes, nb_classes))
+    n = noise
+    P = (n / (nb_classes - 1)) * P
+
+    if n > 0.0:
+        # 0 -> 1
+        P[0, 0] = 1. - n
+        for i in range(1, nb_classes-1):
+            P[i, i] = 1. - n
+        P[nb_classes-1, nb_classes-1] = 1. - n
+
+        y_train_noisy = multiclass_noisify(y_train, P=P,
+                                           random_state=random_state)
+        actual_noise = (y_train_noisy != y_train).mean()
+        assert actual_noise > 0.0
+        print('Actual noise %.2f' % actual_noise)
+        y_train = y_train_noisy
+
+    return y_train, actual_noise,P
+
+def noisify_trid(y_train, noise, random_state=1, nb_classes=10):
+    """mistakes:
+        flip in the trid
+    """
+    P = np.eye(nb_classes)
+    n = noise
+
+    if n > 0.0:
+        # 0 -> 1
+        P[0, 0], P[0, 1], P[0, nb_classes-1] = 1. - n, n / 2, n /2
+        for i in range(1, nb_classes-1):
+            P[i, i], P[i, i + 1], P[i, i - 1]  = 1. - n, n / 2, n /2
+        P[nb_classes-1, nb_classes-1], P[nb_classes-1, 0], P[nb_classes-1, nb_classes-2]  = 1. - n, n / 2, n /2
+
+        y_train_noisy = multiclass_noisify(y_train, P=P,
+                                           random_state=random_state)
+        actual_noise = (y_train_noisy != y_train).mean()
+        y_train = y_train_noisy
+
+    return y_train, actual_noise, P
+
+
+
+def noisify_multiclass_asymmetric_mnist(y_train, noise, random_state=None, nb_classes=10):
+    """mistakes:
+        flip in the symmetric way
+    """
+    P = np.eye(10)
+    n = noise
+
+    # 2 -> 7
+    P[2, 2], P[2, 7] = 1. - n, n
+
+    # 5 <-> 6
+    P[5, 5], P[5, 6] = 1. - n, n
+    P[6, 6], P[6, 5] = 1. - n, n
+
+    # 3 -> 8
+    P[3, 3], P[3, 8] = 1. - n, n
+
+    y_train_noisy = multiclass_noisify(y_train, P=P, random_state=random_state)
+    actual_noise = (y_train_noisy != y_train).mean()
+    assert actual_noise > 0.0
+    print('Actual noise %.2f' % actual_noise)
+
+    y_train = y_train_noisy
+
+    return y_train, actual_noise, P
+
+
+def build_for_cifar100(size, noise):
+    """ random flip between two random classes.
+    """
+    assert(noise >= 0.) and (noise <= 1.)
+
+    P = (1. - noise) * np.eye(size)
+    for i in np.arange(size - 1):
+        P[i, i+1] = noise
+
+    # adjust last row
+    P[size-1, 0] = noise
+
+    assert_array_almost_equal(P.sum(axis=1), 1, 1)
+    return P
+
+
+def noisify_multiclass_asymmetric_cifar10(y_train, noise, random_state=None, nb_classes=10):
+    """mistakes:
+        flip in the symmetric way
+    """
+    source_class = [9, 2, 3, 5, 4]
+    target_class = [1, 0, 5, 3, 7]
+    y_train_ = np.copy(np.array(y_train))
+    for s, t in zip(source_class, target_class):
+        cls_idx = np.where(np.array(y_train) == s)[0]
+        n_noisy = int(noise * cls_idx.shape[0])
+        noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False)
+        for idx in noisy_sample_index:
+            y_train_[idx] = t
+    actual_noise = (y_train_ != np.array(y_train)).mean()
+    print('Actual noise %.2f' % actual_noise)
+    return y_train_, actual_noise, target_class
+
+def noisify_multiclass_asymmetric_cifar100(y_train, noise, random_state=None, nb_classes=100):
+    """mistakes:
+        flip in the symmetric way
+    """
+    nb_classes = 100
+    P = np.eye(nb_classes)
+    n = noise
+    nb_superclasses = 20
+    nb_subclasses = 5
+
+    if n > 0.0:
+        for i in np.arange(nb_superclasses):
+            init, end = i * nb_subclasses, (i + 1) * nb_subclasses
+            P[init:end, init:end] = build_for_cifar100(nb_subclasses, n)
+
+            y_train_noisy = multiclass_noisify(np.array(y_train), P=P, random_state=random_state)
+            actual_noise = (y_train_noisy != np.array(y_train)).mean()
+        assert actual_noise > 0.0
+        print('Actual noise %.2f' % actual_noise)
+        targets = y_train_noisy
+    return targets, actual_noise, P
+
+
+def noisify(dataset='mnist', nb_classes=10, train_labels=None, noise_type=None, noise_rate=0, random_state=1):
+    if noise_type == 'pairflip':
+        train_noisy_labels, actual_noise_rate = noisify_pairflip(train_labels, noise_rate, random_state=1, nb_classes=nb_classes)
+    if noise_type == 'symmetric':
+        train_noisy_labels, actual_noise_rate = noisify_multiclass_symmetric(train_labels, noise_rate, random_state=1, nb_classes=nb_classes)
+    return train_noisy_labels, actual_noise_rate
+
+
+def init_params(net):
+    '''Init layer parameters.'''
+    for m in net.modules():
+        if isinstance(m, nn.Conv2d):
+            nn.init.kaiming_normal(m.weight, mode='fan_out')
+            if m.bias:
+                nn.init.constant(m.bias, 0)
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.constant(m.weight, 1)
+            nn.init.constant(m.bias, 0)
+        elif isinstance(m, nn.Linear):
+            nn.init.normal(m.weight, std=1e-3)
+            if m.bias:
+                nn.init.constant(m.bias, 0)
+
+
+def transition_matrix_generate(noise_rate=0.5, num_classes=10):
+    P = np.ones((num_classes, num_classes))
+    n = noise_rate
+    P = (n / (num_classes - 1)) * P
+
+    if n > 0.0:
+        # 0 -> 1
+        P[0, 0] = 1. - n
+        for i in range(1, num_classes-1):
+            P[i, i] = 1. - n
+        P[num_classes-1, num_classes-1] = 1. - n
+    return P
+
+
+def fit(X, num_classes, filter_outlier=False):
+    # number of classes
+    c = num_classes
+    T = np.empty((c, c))
+    eta_corr = X
+    for i in np.arange(c):
+        if not filter_outlier:
+            idx_best = np.argmax(eta_corr[:, i])
+        else:
+            eta_thresh = np.percentile(eta_corr[:, i], 97,interpolation='higher')
+            robust_eta = eta_corr[:, i]
+            robust_eta[robust_eta >= eta_thresh] = 0.0
+            idx_best = np.argmax(robust_eta)
+        for j in np.arange(c):
+            T[i, j] = eta_corr[idx_best, j]
+    return T
+
+
+# flip clean labels to noisy labels
+# train set and val set split
+def dataset_split(train_images, train_labels, 
+                  dataset='mnist', noise_type='symmetric', noise_rate=0.5, 
+                  split_per=0.9, random_seed=1, num_classes=10
+                ):
+    
+    clean_train_labels = train_labels[:, np.newaxis]
+    
+    if noise_type == 'symmetric':
+         noisy_labels, real_noise_rate, transition_matrix = noisify_multiclass_symmetric(clean_train_labels, 
+                                                                                        noise=noise_rate, 
+                                                                                        random_state=random_seed, 
+                                                                                        nb_classes=num_classes)
+    if noise_type == 'pairflip':
+        noisy_labels, real_noise_rate, transition_matrix = noisify_pairflip(clean_train_labels,
+                                                                            noise=noise_rate,
+                                                                            random_state=random_seed,
+                                                                            nb_classes=num_classes)
+    if noise_type == 'asymmetric' and dataset == 'mnist':
+        noisy_labels, real_noise_rate, transition_matrix = noisify_multiclass_asymmetric_mnist(clean_train_labels,
+                                                                                            noise=noise_rate,
+                                                                                            random_state=random_seed,
+                                                                                            nb_classes=num_classes)
+    
+    if noise_type == 'asymmetric' and dataset == 'cifar10':
+        noisy_labels, real_noise_rate, transition_matrix = noisify_multiclass_asymmetric_cifar10(clean_train_labels,
+                                                                                                noise=noise_rate,
+                                                                                                random_state=random_seed,
+                                                                                                nb_classes=num_classes)
+        
+    if noise_type == 'asymmetric' and dataset == 'cifar100':
+        noisy_labels, real_noise_rate, transition_matrix = noisify_multiclass_asymmetric_cifar100(clean_train_labels,
+                                                                                                noise=noise_rate,
+                                                                                                random_state=random_seed,
+                                                                                                nb_classes=num_classes)
+
+    if noise_type == 'instance' and dataset == 'mnist':
+        noisy_labels = get_instance_noisy_label(noise_rate, 
+                                                images=train_images,
+                                                labels=train_labels,
+                                                num_classes=10, 
+                                                feature_size=784, 
+                                                norm_std=0.1, 
+                                                seed=random_seed)
+    
+    if noise_type == 'instance' and dataset == 'cifar10':
+        noisy_labels = get_instance_noisy_label(noise_rate, 
+                                                images=train_images,
+                                                labels=train_labels,
+                                                num_classes=10, 
+                                                feature_size=3072, 
+                                                norm_std=0.1, 
+                                                seed=random_seed)
+        
+    if noise_type == 'instance' and dataset == 'cifar100':
+        noisy_labels = get_instance_noisy_label(noise_rate, 
+                                                images=train_images,
+                                                labels=train_labels,
+                                                num_classes=100, 
+                                                feature_size=3072, 
+                                                norm_std=0.1, 
+                                                seedcle=random_seed)
+
+    noisy_labels = noisy_labels.squeeze()
+    num_samples = int(noisy_labels.shape[0])
+    np.random.seed(random_seed)
+    train_set_index = np.random.choice(num_samples, int(num_samples*split_per), replace=False)
+    index = np.arange(train_images.shape[0])
+    val_set_index = np.delete(index, train_set_index)
+
+    train_set, val_set = train_images[train_set_index, :], train_images[val_set_index, :]
+    train_labels, val_labels = noisy_labels[train_set_index], noisy_labels[val_set_index]
+
+    noise_mask = np.transpose(clean_train_labels[train_set_index]) == np.transpose(train_labels)
+
+    return train_set, val_set, train_labels, val_labels, noise_mask
+
+
+def dataset_split_without_noise(train_images, train_labels, split_per=0.9, random_seed=1):
+    total_labels = train_labels[:, np.newaxis]
+    num_samples = int(total_labels.shape[0])
+    np.random.seed(random_seed)
+    train_set_index = np.random.choice(num_samples, int(num_samples * split_per), replace=False)
+    index = np.arange(train_images.shape[0])
+    val_set_index = np.delete(index, train_set_index)
+    train_set, val_set = train_images[train_set_index], train_images[val_set_index]
+    train_labels, val_labels = total_labels[train_set_index], total_labels[val_set_index]
+
+    noise_mask = np.full(len(train_labels), True)
+
+    return train_set, val_set, train_labels.squeeze(), val_labels.squeeze(), noise_mask
+
+
+def transform_target(label):
+    label = np.array(label)
+    target = torch.from_numpy(label).long()
+    return target
+
+
+def get_instance_noisy_label(noise_rate, images, labels, num_classes, feature_size, norm_std, seed):
+    """
+    images: mnist, cifar10, cifar100
+    label_num: class number
+    feature_size: the size of input images (e.g. 28*28)
+    norm_std: default 0.1
+    """
+
+    label_num = num_classes
+    np.random.seed(int(seed))
+    torch.manual_seed(int(seed))
+    torch.cuda.manual_seed(int(seed))
+
+    P = []
+    flip_distribution = stats.truncnorm((0 - noise_rate) / norm_std, (1 - noise_rate) / norm_std, loc=noise_rate, scale=norm_std)
+    # flip_distribution = stats.beta(a=0.01, b=(0.01 / n) - 0.01, loc=0, scale=1)
+    flip_rate = flip_distribution.rvs(labels.shape[0])
+
+    images = torch.Tensor(images).to(torch.float)
+    if not torch.is_tensor(labels):
+        labels = torch.FloatTensor(labels)
+    labels = labels.to(DEVICE)
+
+    W = np.random.randn(label_num, feature_size, label_num)
+    W = torch.FloatTensor(W).to(DEVICE)
+
+    for i, (x, y) in enumerate(zip(images, labels)):
+        x = x.to(DEVICE)
+        y = int(y)
+        A = x.view(1, -1).mm(W[y]).squeeze(0)
+        A[y] = -np.inf
+        A = flip_rate[i] * F.softmax(A, dim=0)
+        A[y] += 1 - flip_rate[i]
+        P.append(A)
+    P = torch.stack(P, 0).cpu().numpy()
+
+    l = [i for i in range(label_num)]
+    new_label = [np.random.choice(l, p=P[i]) for i in range(labels.shape[0])]
+    return np.array(new_label)
diff --git a/deep-learning/food.py b/deep-learning/food.py
new file mode 100644
index 00000000..9ac54327
--- /dev/null
+++ b/deep-learning/food.py
@@ -0,0 +1,280 @@
+import os
+import argparse
+import datetime
+import time
+
+import numpy as np
+
+import torch
+from torch.optim.lr_scheduler import MultiStepLR
+from torchvision.datasets import Food101
+from torchvision import transforms
+from torch.utils.data.sampler import SubsetRandomSampler
+
+import methods
+import utils
+from models.resnet50 import resnet50
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--result_dir', type=str, help = 'dir to save result txt files', default='results/')
+parser.add_argument('--root_dir', type=str, help = 'dir that stores dataset', default='data/')
+parser.add_argument('--lr_init', type=float, default=0.001)
+parser.add_argument('--n_epoch', type=int, default=100)
+parser.add_argument('--batch_size', type=int, help='batch_size', default=32)
+parser.add_argument('--wd', type=float, help='l2 regularization', default=1e-4)
+parser.add_argument('--method', type=str, help='[regular, rlvi, coteaching, jocor, cdr, usdnl, bare]', default='regular')
+parser.add_argument('--noise_rate', type=float, help='corruption level, should be less than 1', default=0.1)
+parser.add_argument('--split_percentage', type=float, help='train and validation', default=0.9)
+parser.add_argument('--print_freq', type=int, default=1)
+parser.add_argument('--num_workers', type=int, default=4, help='how many subprocesses to use for data loading')
+parser.add_argument('--seed', type=int, default=1)
+# For alternative methods
+parser.add_argument('--forget_rate', type=float, help='forget rate', default=None)
+parser.add_argument('--num_gradual', type=int, default=10, help='how many epochs for linear drop rate, can be 5, 10, 15. This parameter is equal to Tk for R(T) in Co-teaching paper.')
+parser.add_argument('--exponent', type=float, default=1, help='exponent of the forget rate, can be 0.5, 1, 2. This parameter is equal to c in Tc for R(T) in Co-teaching paper.')
+
+args = parser.parse_args()
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+# Seed
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+if DEVICE == "cuda":
+    torch.cuda.manual_seed(args.seed)
+
+
+# Load datasets for training, validation, and testing
+transform_train = transforms.Compose([
+	transforms.Resize(224), 
+	transforms.CenterCrop(224), 
+	transforms.RandomHorizontalFlip(),
+	transforms.RandomVerticalFlip(),
+	transforms.RandomRotation(45),
+	transforms.RandomAffine(45),
+	transforms.ColorJitter(),
+	transforms.ToTensor(),
+	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+	])
+
+transform_test = transforms.Compose([
+	transforms.Resize(256), 
+	transforms.CenterCrop(224), 
+	transforms.ToTensor(), 
+	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+	])
+
+
+class Food101Dataset(torch.utils.data.Dataset):
+    def __init__(self, root, train=True):
+        if train:
+            split = 'train'
+            transform = transform_train
+        else:
+            split = 'test'
+            transform = transform_test
+
+        self.dataset = Food101(root=root,
+                                download=True,
+                                split=split,
+                                transform=transform)
+
+    def __getitem__(self, index):
+        data, target = self.dataset[index]
+        return data, target, index
+
+    def __len__(self):
+        return len(self.dataset)
+
+train_dataset = Food101Dataset(root=args.root_dir, train=True)
+test_dataset = Food101Dataset(root=args.root_dir, train=False)
+
+
+# For alternative methods:
+# create rate_schedule to gradually consider less and less samples
+if args.forget_rate is None:
+    forget_rate = args.noise_rate
+else:
+    forget_rate = args.forget_rate
+rate_schedule = np.ones(args.n_epoch) * forget_rate
+rate_schedule[:args.num_gradual] = np.linspace(0, forget_rate**args.exponent, args.num_gradual)
+
+
+# Prepare a structured output
+save_dir = f"{args.result_dir}/food101/{args.method}"
+if not os.path.exists(save_dir):
+    os.system('mkdir -p %s' % save_dir)
+
+model_str = f"food101_{args.method}"
+txtfile = f"{save_dir}/{model_str}-s{args.seed}.txt"
+if os.path.exists(txtfile):
+    curr_time = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+    new_dest = f"{txtfile}.bak-{curr_time}"
+    os.system(f"mv {txtfile} {new_dest}")
+
+
+def run():
+    # Data Loaders
+    indices = np.arange(len(train_dataset))
+    np.random.shuffle(indices)
+    split = int(np.floor(args.split_percentage * len(train_dataset)))
+    train_indices, val_indices = indices[:split], indices[split:]
+    train_sampler = SubsetRandomSampler(train_indices)
+    val_sampler = SubsetRandomSampler(val_indices)
+
+    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
+                                               batch_size=args.batch_size, 
+                                               sampler=train_sampler,
+                                               num_workers=args.num_workers,
+                                               drop_last=False,
+                                               pin_memory=True)
+
+    val_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                             batch_size=args.batch_size,
+                                             sampler=val_sampler,
+                                             num_workers=args.num_workers,
+                                             drop_last=False,
+                                             pin_memory=True)
+
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
+                                              batch_size=args.batch_size, 
+                                              num_workers=args.num_workers,
+                                              drop_last=False,
+                                              pin_memory=True)
+    
+    # Prepare models and optimizers
+    if args.method == 'usdnl':
+        model = resnet50(input_channel=3, num_classes=101, pretrained=True, dropout_rate=0.25)
+    else:
+        model = resnet50(input_channel=3, num_classes=101, pretrained=True)
+    model.to(DEVICE)
+    optimizer = torch.optim.Adam(
+        params=[
+            {"params": model.fc_params(), "lr": args.lr_init},
+            {"params": model.backbone_params()}
+        ], 
+        lr=1e-4, weight_decay=args.wd
+    )
+
+    if args.method == 'jocor' or args.method == 'coteaching':
+        model_sec = resnet50(input_channel=3, num_classes=101, pretrained=True)
+        model_sec.to(DEVICE)
+        if args.method == 'jocor':
+            optimizer = torch.optim.Adam(
+                params=[
+                    {"params": list(model.fc_params()) + list(model_sec.fc_params()), "lr": args.lr_init},
+                    {"params": list(model.backbone_params()) + list(model_sec.backbone_params())}
+                ], 
+                lr=1e-4, weight_decay=args.wd
+            )
+        else:
+            optimizer_sec = torch.optim.Adam(
+                params=[
+                    {"params": model_sec.fc_params(), "lr": args.lr_init},
+                    {"params": model_sec.backbone_params()}
+                ], 
+                lr=1e-4, weight_decay=args.wd
+        )
+
+    # Schedule for the learning rate
+    scheduler = MultiStepLR(optimizer, milestones=[10, 20, 30], gamma=0.2)
+    if args.method == 'coteaching':
+        scheduler_sec = MultiStepLR(optimizer_sec, milestones=[10, 20, 30], gamma=0.2)
+    
+    if args.method == 'rlvi':
+        sample_weights = torch.ones(len(train_dataset)).to(DEVICE)
+        residuals = torch.zeros_like(sample_weights).to(DEVICE)
+        overfit = False
+        threshold = 0
+
+
+    # Evaluate init model
+    test_acc = utils.evaluate(test_loader, model)
+    utils.output_table(epoch=0, n_epoch=args.n_epoch, test_acc=test_acc)
+    with open(txtfile, "a") as myfile:
+        myfile.write("epoch:\ttime_ep\ttau\tfix\ttrain_acc\tval_acc\ttest_acc\n")
+        myfile.write(f"0:\t0\t0\t{False}\t0\t0\t{test_acc:8.4f}\n")
+
+    # Training
+    for epoch in range(1, args.n_epoch):
+        model.train()
+
+        time_ep = time.time()
+
+        #### Start one epoch of training with selected method ####
+
+        if args.method == "regular":
+            train_acc = methods.train_regular(train_loader, model, optimizer)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == "rlvi":
+            train_acc, threshold = methods.train_rlvi(
+                train_loader, model, optimizer,
+                residuals, sample_weights, overfit, threshold
+            )
+            val_acc = utils.evaluate(val_loader, model)
+            # Using no regularization by default (as val. score is increasing)
+            overfit = False
+
+        elif args.method == 'coteaching':
+            model_sec.train()
+            train_acc = methods.train_coteaching(
+                train_loader, epoch, 
+                model, optimizer, model_sec, optimizer_sec,
+                rate_schedule
+            )
+            val_acc = utils.evaluate(val_loader, model)
+            scheduler_sec.step()
+
+        elif args.method == 'jocor':
+            model_sec.train()
+            train_acc = methods.train_jocor(
+                train_loader, epoch, 
+                model, model_sec, optimizer, 
+                rate_schedule
+            )
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'cdr':
+            train_acc = methods.train_cdr(train_loader, epoch, model, optimizer, rate_schedule)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'usdnl':
+            train_acc = methods.train_usdnl(train_loader, epoch, model, optimizer, rate_schedule)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'bare':
+            train_acc = methods.train_bare(train_loader, model, optimizer, num_classes=101)
+            val_acc = utils.evaluate(val_loader, model)
+
+
+        # Update LR
+        scheduler.step()
+
+        #### Finish one epoch of training with selected method ####
+
+        # Log info
+        time_ep = time.time() - time_ep
+        test_acc = utils.evaluate(test_loader, model)
+
+        # Print log-table
+        if (epoch + 1) % args.print_freq == 0:
+            utils.output_table(epoch, args.n_epoch, time_ep, train_acc=train_acc, test_acc=test_acc)
+        else:
+            utils.output_table(epoch, args.n_epoch, time_ep, train_acc=train_acc)
+
+        # Prepare output: put dummy values for alternative methods
+        if args.method != 'rlvi':
+            overfit = False
+            threshold = 0
+
+        # Save logs to the file
+        with open(txtfile, "a") as myfile:
+            myfile.write(f"{int(epoch)}:\t{time_ep:.2f}\t{threshold:.2f}\t{overfit}\t"
+                         + f"{train_acc:8.4f}\t{val_acc:8.4f}\t{test_acc:8.4f}\n")
+
+
+if __name__ == '__main__':
+    run()
diff --git a/deep-learning/main.py b/deep-learning/main.py
new file mode 100644
index 00000000..b0f05f39
--- /dev/null
+++ b/deep-learning/main.py
@@ -0,0 +1,354 @@
+import os
+import argparse
+import datetime
+import time
+
+import numpy as np
+import torch
+from torch.optim.lr_scheduler import MultiplicativeLR, CosineAnnealingLR
+
+import methods
+import data_load
+import data_tools
+import utils
+
+from models.lenet import LeNet
+from models.resnet import ResNet18, ResNet34
+# models with dropout for USDNL algorithm:
+from models.lenet import LeNetDO
+from models.resnet import ResNet18DO, ResNet34DO
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--result_dir', type=str, help = 'dir to save result txt files', default='results/')
+parser.add_argument('--root_dir', type=str, help = 'dir that stores datasets', default='data/')
+parser.add_argument('--dataset', type=str, help='[mnist, cifar10, cifar100]', default='mnist')
+parser.add_argument('--method', type=str, help='[regular, rlvi, coteaching, jocor, cdr, usdnl, bare]', default='rlvi')
+parser.add_argument('--noise_rate', type=float, help='corruption rate, should be less than 1', default=0.45)
+parser.add_argument('--noise_type', type=str, help='[pairflip, symmetric, asymmetric, instance]', default='pairflip')
+parser.add_argument('--split_percentage', type=float, help='train and validation', default=0.9)
+parser.add_argument('--momentum', type=int, help='momentum', default=0.9)
+parser.add_argument('--print_freq', type=int, default=1)
+parser.add_argument('--num_workers', type=int, default=4, help='number of subprocesses for data loading')
+parser.add_argument('--seed', type=int, default=1)
+# For alternative methods
+parser.add_argument('--forget_rate', type=float, help='forget rate', default=None)
+parser.add_argument('--num_gradual', type=int, default=10, help='how many epochs for linear drop rate, can be 5, 10, 15. This parameter is equal to Tk for R(T) in Co-teaching paper.')
+parser.add_argument('--exponent', type=float, default=1, help='exponent of the forget rate, can be 0.5, 1, 2. This parameter is equal to c in Tc for R(T) in Co-teaching paper.')
+
+args = parser.parse_args()
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+# Seed
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+if DEVICE == "cuda":
+    torch.cuda.manual_seed(args.seed)
+
+
+# Load datasets for training, validation, and testing
+if args.dataset == 'mnist':
+    input_channel = 1
+    num_classes = 10
+    args.n_epoch = 100
+    args.batch_size = 32
+    args.wd = 1e-3
+    args.lr_init = 0.01
+
+    if args.method != 'usdnl':
+        Model = LeNet
+    else:
+        Model = LeNetDO  # with dropout
+
+    train_dataset = data_load.Mnist(root=args.root_dir,
+                                    download=True,
+                                    train=True,
+                                    transform=Model.transform_train,
+                                    target_transform=data_tools.transform_target,
+                                    dataset=args.dataset,
+                                    noise_type=args.noise_type,
+                                    noise_rate=args.noise_rate,
+                                    split_per=args.split_percentage,
+                                    random_seed=args.seed)
+
+    val_dataset = data_load.Mnist(root=args.root_dir,
+                                    train=False,
+                                    transform=Model.transform_test,
+                                    target_transform=data_tools.transform_target,
+                                    dataset=args.dataset,
+                                    noise_type=args.noise_type,
+                                    noise_rate=args.noise_rate,
+                                    split_per=args.split_percentage,
+                                    random_seed=args.seed)
+
+
+    test_dataset = data_load.MnistTest(root=args.root_dir,
+                                        transform=Model.transform_test,
+                                        target_transform=data_tools.transform_target)
+
+
+if args.dataset == 'cifar10':
+    input_channel = 3
+    num_classes = 10
+    args.n_epoch = 200
+    args.batch_size = 128
+    args.wd = 5e-4
+    args.lr_init = 0.01
+
+    if args.method != 'usdnl':
+        Model = ResNet18
+    else:
+        Model = ResNet18DO  # with dropout
+
+    train_dataset = data_load.Cifar10(root=args.root_dir,
+                                        download=True, 
+                                        train=True,
+                                        transform=Model.transform_train,
+                                        target_transform=data_tools.transform_target,
+                                        dataset=args.dataset,
+                                        noise_type=args.noise_type,
+                                        noise_rate=args.noise_rate,
+                                        split_per=args.split_percentage,
+                                        random_seed=args.seed)
+
+    val_dataset = data_load.Cifar10(root=args.root_dir,
+                                    train=False,
+                                    transform=Model.transform_test,
+                                    target_transform=data_tools.transform_target,
+                                    dataset=args.dataset,
+                                    noise_type=args.noise_type,
+                                    noise_rate=args.noise_rate,
+                                    split_per=args.split_percentage,
+                                    random_seed=args.seed)
+
+
+    test_dataset = data_load.Cifar10Test(root=args.root_dir,
+                                            transform=Model.transform_test,
+                                            target_transform=data_tools.transform_target)
+
+
+if args.dataset == 'cifar100':
+    input_channel = 3
+    num_classes = 100
+    args.n_epoch = 200
+    args.batch_size = 128
+    args.wd = 5e-4
+    args.lr_init = 0.01
+
+    if args.method != 'usdnl':
+        Model = ResNet34
+    else:
+        Model = ResNet34DO  # with dropout
+
+    train_dataset = data_load.Cifar100(root=args.root_dir,
+                                        download=True,
+                                        train=True,
+                                        transform=Model.transform_train,
+                                        target_transform=data_tools.transform_target,
+                                        dataset=args.dataset,
+                                        noise_type=args.noise_type,
+                                        noise_rate=args.noise_rate,
+                                        split_per=args.split_percentage,
+                                        random_seed=args.seed)
+
+    val_dataset = data_load.Cifar100(root=args.root_dir,
+                                        train=False,
+                                        transform=Model.transform_test,
+                                        target_transform=data_tools.transform_target,
+                                        dataset=args.dataset,
+                                        noise_type=args.noise_type,
+                                        noise_rate=args.noise_rate,
+                                        split_per=args.split_percentage,
+                                        random_seed=args.seed)
+
+    test_dataset = data_load.Cifar100Test(root=args.root_dir, 
+                                            transform=Model.transform_test, 
+                                            target_transform=data_tools.transform_target)
+
+
+# For alternative methods:
+# create rate_schedule to gradually consider less and less samples
+if args.forget_rate is None:
+    forget_rate = args.noise_rate
+    if args.noise_type == 'asymmetric':
+        forget_rate /= 2.
+else:
+    forget_rate = args.forget_rate
+rate_schedule = np.ones(args.n_epoch) * forget_rate
+rate_schedule[:args.num_gradual] = np.linspace(0, forget_rate**args.exponent, args.num_gradual)
+
+
+# Prepare a structured output
+save_dir = f"{args.result_dir}/{args.dataset}/{args.method}"
+if not os.path.exists(save_dir):
+    os.system('mkdir -p %s' % save_dir)
+
+model_str = f"{args.dataset}_{args.method}_{args.noise_type}_{args.noise_rate}"
+txtfile = f"{save_dir}/{model_str}-s{args.seed}.txt"
+if os.path.exists(txtfile):
+    curr_time = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+    new_dest = f"{txtfile}.bak-{curr_time}"
+    os.system(f"mv {txtfile} {new_dest}")
+
+
+def run():
+    # Data Loaders
+    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                               batch_size=args.batch_size,
+                                               num_workers=args.num_workers,
+                                               drop_last=False,
+                                               shuffle=True,
+                                               pin_memory=True)
+    
+    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
+                                            batch_size=args.batch_size,
+                                            num_workers=args.num_workers,
+                                            drop_last=False,
+                                            shuffle=False,
+                                            pin_memory=True)
+
+    
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+                                              batch_size=args.batch_size,
+                                              num_workers=args.num_workers,
+                                              drop_last=False,
+                                              shuffle=False,
+                                              pin_memory=True)
+    
+    # Prepare models and optimizers
+    model = Model(input_channel=input_channel, num_classes=num_classes)
+    model.to(DEVICE)
+    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_init, weight_decay=args.wd, momentum=args.momentum)
+
+    if args.method == 'jocor':
+        model_sec = Model(input_channel=input_channel, num_classes=num_classes)
+        model_sec.to(DEVICE)
+        optimizer = torch.optim.SGD(
+            list(model.parameters()) + list(model_sec.parameters()), 
+            lr=args.lr_init, weight_decay=args.wd, momentum=args.momentum
+        )
+
+    # Use Multipliactive LR decay for MNIST and Cosine Annealing for CIFAR10/100
+    if args.dataset == 'mnist':
+        scheduler = MultiplicativeLR(optimizer, utils.get_lr_factor)
+    else:
+        scheduler = CosineAnnealingLR(optimizer, T_max=200)
+
+    if args.method == 'coteaching':
+        model_sec = Model(input_channel=input_channel, num_classes=num_classes)
+        model_sec.to(DEVICE)
+        optimizer_sec = torch.optim.SGD(model_sec.parameters(), lr=args.lr_init, weight_decay=args.wd, momentum=args.momentum)
+        if args.dataset == 'mnist':
+            scheduler_sec = MultiplicativeLR(optimizer_sec, utils.get_lr_factor)
+        else:
+            scheduler_sec = CosineAnnealingLR(optimizer_sec, T_max=200)
+    
+
+    if args.method == 'rlvi':
+        sample_weights = torch.ones(len(train_dataset)).to(DEVICE)
+        residuals = torch.zeros_like(sample_weights).to(DEVICE)
+        overfit = False
+        threshold = 0
+        val_acc_old, val_acc_old_old = 0, 0
+
+
+    # Evaluate init model
+    test_acc = utils.evaluate(test_loader, model)
+    utils.output_table(epoch=0, n_epoch=args.n_epoch, test_acc=test_acc)
+    with open(txtfile, "a") as myfile:
+        myfile.write("epoch:\ttime_ep\ttau\tfix\tclean,%\tcorr,%\ttrain_acc\tval_acc\ttest_acc\n")
+        myfile.write(f"0:\t0\t0\t{False}\t100\t0\t0\t0\t{test_acc:8.4f}\n")
+
+    # Training
+    for epoch in range(1, args.n_epoch):
+        model.train()
+
+        time_ep = time.time()
+
+        #### Start one epoch of training with selected method ####
+
+        if args.method == "regular":
+            train_acc = methods.train_regular(train_loader, model, optimizer)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == "rlvi":
+            train_acc, threshold = methods.train_rlvi(
+                train_loader, model, optimizer,
+                residuals, sample_weights, overfit, threshold
+            )
+            # Check if overfitting has started
+            val_acc = utils.evaluate(val_loader, model)
+            if not overfit:
+                if epoch > 2:
+                    # Overfitting started <=> validation score is dropping
+                    overfit = (val_acc < 0.5 * (val_acc_old + val_acc_old_old))
+                val_acc_old_old = val_acc_old
+                val_acc_old = val_acc
+
+        elif args.method == 'coteaching':
+            model_sec.train()
+            train_acc = methods.train_coteaching(
+                train_loader, epoch, 
+                model, optimizer, model_sec, optimizer_sec,
+                rate_schedule
+            )
+            val_acc = utils.evaluate(val_loader, model)
+            scheduler_sec.step()
+
+        elif args.method == 'jocor':
+            model_sec.train()
+            train_acc = methods.train_jocor(
+                train_loader, epoch, 
+                model, model_sec, optimizer, 
+                rate_schedule
+            )
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'cdr':
+            train_acc = methods.train_cdr(train_loader, epoch, model, optimizer, rate_schedule)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'usdnl':
+            train_acc = methods.train_usdnl(train_loader, epoch, model, optimizer, rate_schedule)
+            val_acc = utils.evaluate(val_loader, model)
+
+        elif args.method == 'bare':
+            train_acc = methods.train_bare(train_loader, model, optimizer, num_classes)
+            val_acc = utils.evaluate(val_loader, model)
+
+        # Update LR
+        scheduler.step()
+
+        #### Finish one epoch of training with selected method ####
+
+        # Log info
+        time_ep = time.time() - time_ep
+        test_acc = utils.evaluate(test_loader, model)
+
+        # Print log-table
+        if (epoch + 1) % args.print_freq == 0:
+            utils.output_table(epoch, args.n_epoch, time_ep, train_acc=train_acc, test_acc=test_acc)
+        else:
+            utils.output_table(epoch, args.n_epoch, time_ep, train_acc=train_acc)
+
+        # Prepare output: put dummy values for alternative methods
+        if args.method != 'rlvi':
+            overfit = False
+            threshold = 0
+            clean, corr = 1, 0
+        # Check the number of correctly identified corrupted samples for RLVI
+        if (args.method == 'rlvi') and (args.noise_rate > 0):
+            mask = (sample_weights > threshold).cpu()
+            clean, corr = utils.get_ratio_corrupted(mask, train_dataset.noise_mask)
+
+        # Save logs to the file
+        with open(txtfile, "a") as myfile:
+            myfile.write(f"{int(epoch)}:\t{time_ep:.2f}\t{threshold:.2f}\t{overfit}\t"
+                         + f"{clean*100:.2f}\t{corr*100:.2f}\t"
+                         + f"{train_acc:8.4f}\t{val_acc:8.4f}\t{test_acc:8.4f}\n")
+
+
+if __name__ == '__main__':
+    run()
diff --git a/deep-learning/methods/__init__.py b/deep-learning/methods/__init__.py
new file mode 100644
index 00000000..24948695
--- /dev/null
+++ b/deep-learning/methods/__init__.py
@@ -0,0 +1,7 @@
+from .train_rlvi import *
+from .train_regular import *
+from .train_cdr import *
+from .train_coteaching import *
+from .train_jocor import *
+from .train_usdnl import *
+from .train_bare import *
diff --git a/deep-learning/methods/train_bare.py b/deep-learning/methods/train_bare.py
new file mode 100644
index 00000000..da3d7782
--- /dev/null
+++ b/deep-learning/methods/train_bare.py
@@ -0,0 +1,82 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_bare']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+class WeightedCCE(nn.Module):
+    """
+    Implementing BARE with Cross-Entropy (CCE) Loss
+    """
+
+    def __init__(self, k=1, num_class=10, reduction="mean"):
+        super(WeightedCCE, self).__init__()
+
+        self.k = k
+        self.reduction = reduction
+        self.num_class = num_class
+
+
+    def forward(self, prediction, target_label, one_hot=True):
+        EPS = 1e-8
+        if one_hot:
+            y_true = F.one_hot(target_label.type(torch.LongTensor),
+                               num_classes=self.num_class).to(DEVICE)
+        y_pred = F.softmax(prediction, dim=1)
+        y_pred = torch.clamp(y_pred, EPS, 1-EPS)
+        pred_tmp = torch.sum(y_true * y_pred, axis=-1).reshape(-1, 1)
+
+        ## Compute batch statistics
+        avg_post = torch.mean(y_pred, dim=0)
+        avg_post = avg_post.reshape(-1, 1)
+        std_post = torch.std(y_pred, dim=0)
+        std_post = std_post.reshape(-1, 1)
+        avg_post_ref = torch.matmul(y_true.type(torch.float), avg_post)
+        std_post_ref = torch.matmul(y_true.type(torch.float), std_post)
+        pred_prun = torch.where((pred_tmp - avg_post_ref >= self.k * std_post_ref),
+                                pred_tmp, torch.zeros_like(pred_tmp))
+
+        # prun_idx will tell us which examples are
+        # 'trustworthy' for the given batch
+        prun_idx = torch.where(pred_prun != 0.)[0]
+        if len(prun_idx) != 0:
+            prun_targets = torch.argmax(torch.index_select(y_true, 0, prun_idx), dim=1)
+            weighted_loss = F.cross_entropy(torch.index_select(prediction, 0, prun_idx), 
+                                            prun_targets, reduction=self.reduction)
+        else:
+            weighted_loss = F.cross_entropy(prediction, target_label)
+            
+        return weighted_loss
+
+
+def train_bare(train_loader, model, optimizer, num_classes):
+    train_total = 0
+    train_correct = 0
+
+    loss_fn = WeightedCCE(k=1, num_class=num_classes, reduction="none")
+    for (images, labels, indexes) in train_loader:
+
+        images = Variable(images).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+        
+        logits = model(images)
+
+        prec, _ = accuracy(logits, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec
+
+        loss = loss_fn(logits, labels)
+        optimizer.zero_grad()
+        loss.mean().backward()
+        optimizer.step()
+
+    train_acc = float(train_correct) / float(train_total)
+    return train_acc
diff --git a/deep-learning/methods/train_cdr.py b/deep-learning/methods/train_cdr.py
new file mode 100644
index 00000000..1b63f7c4
--- /dev/null
+++ b/deep-learning/methods/train_cdr.py
@@ -0,0 +1,72 @@
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_cdr']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+def train_one_step(model, data, label, optimizer, criterion, nonzero_ratio, clip):
+    model.train()
+    pred = model(data)
+    loss = criterion(pred, label)
+    loss.backward()
+    
+    to_concat_g = []
+    to_concat_v = []
+    for name, param in model.named_parameters():
+        if param.dim() in [2, 4]:
+            to_concat_g.append(param.grad.data.view(-1))
+            to_concat_v.append(param.data.view(-1))
+    all_g = torch.cat(to_concat_g)
+    all_v = torch.cat(to_concat_v)
+    metric = torch.abs(all_g * all_v)
+    num_params = all_v.size(0)
+    nz = int(nonzero_ratio * num_params)
+    top_values, _ = torch.topk(metric, nz)
+    thresh = top_values[-1]
+
+    if str(DEVICE) == 'cuda':
+       dtype = torch.cuda.FloatTensor
+    else:
+       dtype = torch.FloatTensor
+    for name, param in model.named_parameters():
+        if param.dim() in [2, 4]:
+            mask = (torch.abs(param.data * param.grad.data) >= thresh).type(dtype)
+            mask = mask * clip
+            param.grad.data = mask * param.grad.data
+
+    optimizer.step()
+    optimizer.zero_grad()
+    acc = accuracy(pred, label, topk=(1, 5))
+
+    return float(acc[0]), loss
+
+
+def train_cdr(train_loader, epoch, model, optimizer, rate_schedule):
+    train_total = 0
+    train_correct = 0
+
+    clip = 1 - rate_schedule[epoch]
+    for (data, labels, indexes) in train_loader:
+        data = data.to(DEVICE)
+        labels = labels.to(DEVICE)
+        # Forward + Backward + Optimize
+        logits = model(data)
+        prec, _ = accuracy(logits, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec
+        # Loss transfer
+        prec, loss = train_one_step(
+            model, data, labels, optimizer, nn.CrossEntropyLoss(), clip, clip
+        )
+      
+    train_acc = float(train_correct) / float(train_total)
+    return train_acc
diff --git a/deep-learning/methods/train_coteaching.py b/deep-learning/methods/train_coteaching.py
new file mode 100644
index 00000000..5db23984
--- /dev/null
+++ b/deep-learning/methods/train_coteaching.py
@@ -0,0 +1,76 @@
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_coteaching']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+# Loss function
+def loss_coteaching(y_1, y_2, t, forget_rate, ind):
+    loss_1 = F.cross_entropy(y_1, t, reduction='none')
+    ind_1_sorted = np.argsort(loss_1.data.cpu()).to(DEVICE)
+    loss_1_sorted = loss_1[ind_1_sorted]
+
+    loss_2 = F.cross_entropy(y_2, t, reduction='none')
+    ind_2_sorted = np.argsort(loss_2.data.cpu()).to(DEVICE)
+    loss_2_sorted = loss_2[ind_2_sorted]
+
+    remember_rate = 1 - forget_rate
+    num_remember = int(remember_rate * len(loss_1_sorted))
+
+    ind_1_update=ind_1_sorted[:num_remember]
+    ind_2_update=ind_2_sorted[:num_remember]
+    # exchange
+    loss_1_update = F.cross_entropy(y_1[ind_2_update], t[ind_2_update])
+    loss_2_update = F.cross_entropy(y_2[ind_1_update], t[ind_1_update])
+
+    return torch.sum(loss_1_update)/num_remember, torch.sum(loss_2_update)/num_remember
+
+
+# Train the Co-teaching Model
+def train_coteaching(train_loader, epoch, 
+                     model1, optimizer1, 
+                     model2, optimizer2, 
+                     rate_schedule):
+    train_total = 0
+    train_correct = 0 
+    train_total2 = 0
+    train_correct2 = 0 
+
+    for (images, labels, indexes) in train_loader:
+        ind = indexes.cpu().numpy().transpose()
+      
+        images = Variable(images).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+        
+        logits1 = model1(images)
+        prec1, _ = accuracy(logits1, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec1
+
+        logits2 = model2(images)
+        prec2, _ = accuracy(logits2, labels, topk=(1, 5))
+        train_total2 += 1
+        train_correct2 += prec2
+
+        loss_1, loss_2 = loss_coteaching(
+            logits1, logits2, labels, rate_schedule[epoch], ind
+        )
+
+        optimizer1.zero_grad()
+        loss_1.backward()
+        optimizer1.step()
+        optimizer2.zero_grad()
+        loss_2.backward()
+        optimizer2.step()
+
+    train_acc1 = float(train_correct) / float(train_total)
+    return train_acc1
\ No newline at end of file
diff --git a/deep-learning/methods/train_jocor.py b/deep-learning/methods/train_jocor.py
new file mode 100644
index 00000000..4227637b
--- /dev/null
+++ b/deep-learning/methods/train_jocor.py
@@ -0,0 +1,76 @@
+# -*- coding:utf-8 -*-
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_jocor']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+def kl_loss_compute(pred, soft_targets, reduce='none'):
+    kl = F.kl_div(
+        F.log_softmax(pred, dim=1),
+        F.softmax(soft_targets, dim=1), 
+        reduction=reduce
+    )
+    if reduce:
+        return torch.mean(torch.sum(kl, dim=1))
+    else:
+        return torch.sum(kl, 1)
+
+
+def loss_jocor(y_1, y_2, t, forget_rate, ind, co_lambda=0.1):
+    loss_pick_1 = F.cross_entropy(y_1, t, reduction="none") * (1-co_lambda)
+    loss_pick_2 = F.cross_entropy(y_2, t, reduction="none") * (1-co_lambda)
+    loss_pick = (
+        (loss_pick_1 + loss_pick_2 + co_lambda * kl_loss_compute(y_1, y_2, reduce='none')
+        + co_lambda * kl_loss_compute(y_2, y_1, reduce='none')).cpu()
+    )
+    ind_sorted = np.argsort(loss_pick.data)
+    loss_sorted = loss_pick[ind_sorted]
+    remember_rate = 1 - forget_rate
+    num_remember = int(remember_rate * len(loss_sorted))
+    ind_update = ind_sorted[:num_remember]
+    # exchange
+    loss = torch.mean(loss_pick[ind_update])
+    return loss
+
+
+def train_jocor(train_loader, epoch, model1, model2, optimizer, rate_schedule):
+    train_total = 0
+    train_correct = 0
+    train_total2 = 0
+    train_correct2 = 0
+
+    for (images, labels, indexes) in train_loader:
+        ind = indexes.cpu().numpy().transpose()
+
+        images = Variable(images).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+
+        # Forward + Backward + Optimize
+        logits1 = model1(images)
+        prec1, _ = accuracy(logits1, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec1
+
+        logits2 = model2(images)
+        prec2, _ = accuracy(logits2, labels, topk=(1, 5))
+        train_total2 += 1
+        train_correct2 += prec2
+
+        loss_1 = loss_jocor(logits1, logits2, labels, rate_schedule[epoch], ind)
+
+        optimizer.zero_grad()
+        loss_1.backward()
+        optimizer.step()
+
+    train_acc1 = float(train_correct) / float(train_total)
+    return train_acc1
diff --git a/deep-learning/methods/train_regular.py b/deep-learning/methods/train_regular.py
new file mode 100644
index 00000000..a9631444
--- /dev/null
+++ b/deep-learning/methods/train_regular.py
@@ -0,0 +1,36 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_regular']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+def train_regular(train_loader, model, optimizer):
+    train_total = 0
+    train_correct = 0
+
+    for (images, labels, indexes) in train_loader:
+
+        images = Variable(images).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+        
+        logits = model(images)
+        
+        prec, _ = accuracy(logits, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec
+
+        loss = F.cross_entropy(logits, labels)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    train_acc = float(train_correct) / float(train_total)
+    return train_acc
\ No newline at end of file
diff --git a/deep-learning/methods/train_rlvi.py b/deep-learning/methods/train_rlvi.py
new file mode 100644
index 00000000..76f4bebf
--- /dev/null
+++ b/deep-learning/methods/train_rlvi.py
@@ -0,0 +1,106 @@
+import torch
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_rlvi']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+@torch.no_grad()
+def update_sample_weights(residuals, weights, tol=1e-3, maxiter=40):
+    """
+    Optimize Bernoulli probabilities
+    
+    Parameters
+    ----------
+        residuals : array,
+            shape: len(train_loader) - cross-entropy loss for each training sample.
+        weights : array,
+            shape: len(train_loader) - Bernoulli probabilities pi: pi_i = probability that sample i is non-corrupted.
+            Updated inplace.
+    """
+    residuals.sub_(residuals.min())
+    exp_res = torch.exp(-residuals)
+    avg_weight = 0.95
+    for _ in range(maxiter):
+        ratio = avg_weight / (1 - avg_weight)
+        new_weights = torch.div(ratio * exp_res, 1 + ratio * exp_res)
+        error = torch.norm(new_weights - weights)
+        weights[:] = new_weights
+        avg_weight = weights.mean()
+        if error < tol:
+            break
+    weights.div_(weights.max())
+
+
+def false_negative_criterion(weights, alpha=0.05):
+    '''Find threshold from the fixed probability (alpha) of type II error'''
+    total_positive = torch.sum(1 - weights)
+    beta = total_positive * alpha
+    sorted_weights, _ = torch.sort(weights, dim=0, descending=True)
+    false_negative = torch.cumsum(1 - sorted_weights, dim=0)
+    last_index = torch.sum(false_negative <= beta) - 1
+    threshold = sorted_weights[last_index]
+    return threshold
+
+
+def train_rlvi(train_loader, model, optimizer,
+               residuals, weights, overfit, threshold):
+    """
+    Train one epoch: apply SGD updates using Bernoulli probabilities. 
+    Thus, optimize variational bound of the marginal likelihood 
+    instead of the standard neg. log-likelihood (cross-entropy for classification).
+    
+    Parameters
+    ----------
+        residuals : array,
+            shape: len(train_loader) - to store cross-entropy loss on each training sample.
+            Shared across epochs.
+        weights : array,
+            shape: len(train_loader) - Bernoulli probabilities: pi_i = proba (in [0; 1]) that sample i is non-corrupted.
+            Shared across epochs.
+        overfit : bool - flag indicating whether overfitting has started.
+        threshold : float in [0; 1] - previous threshold for truncation: pi_i < threshold => pi_i = 0.
+
+    Returns
+    -------
+        train_acc : float - top-1 accuracy on training samples.
+        threshold : float in [0; 1] - updated threshold, based on type II error criterion.
+    """
+
+    train_total = 0
+    train_correct = 0
+
+    for (images, labels, indexes) in train_loader:
+      
+        images = Variable(images).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+        
+        logits = model(images)
+        prec, _ = accuracy(logits, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec
+
+        loss = F.cross_entropy(logits, labels, reduction='none')
+        residuals[indexes] = loss  # save the losses on the current batch
+        
+        batch_weights = weights[indexes]
+        loss = loss * batch_weights  # modify loss with Bernoulli probabilities
+        loss = loss.mean()
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    update_sample_weights(residuals, weights)
+    if overfit:
+        # Regularization: truncate samples with high probability of corruption
+        threshold = max(threshold, false_negative_criterion(weights))
+        weights[weights < threshold] = 0
+
+    train_acc = float(train_correct) / float(train_total)
+    return train_acc, threshold
diff --git a/deep-learning/methods/train_usdnl.py b/deep-learning/methods/train_usdnl.py
new file mode 100644
index 00000000..2b4034ff
--- /dev/null
+++ b/deep-learning/methods/train_usdnl.py
@@ -0,0 +1,53 @@
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+from utils import accuracy
+
+
+__all__ = ['train_usdnl']
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+def loss_fn(logits, labels, forget_rate):
+    loss_pick = F.cross_entropy(logits, labels.long(), reduction='none')
+    loss_pick = loss_pick.cpu()
+    ind_sorted = np.argsort(loss_pick.data)
+    loss_sorted = loss_pick[ind_sorted]
+
+    remember_rate = 1 - forget_rate
+    num_remember = int(remember_rate * len(loss_sorted))
+    ind_update = ind_sorted[:num_remember]
+    # Exchange
+    loss = torch.mean(loss_pick[ind_update])
+    return loss
+
+
+def train_usdnl(train_loader, epoch, model, optimizer, rate_schedule):
+    model.train()
+    train_total = 0
+    train_correct = 0
+
+    for (data, labels, indexes) in train_loader:
+
+        data = Variable(data).to(DEVICE)
+        labels = Variable(labels).to(DEVICE)
+
+        # Forward + Backward + Optimize
+        logits = model(data)
+        prec, _ = accuracy(logits, labels, topk=(1, 5))
+        train_total += 1
+        train_correct += prec
+
+        loss = loss_fn(logits, labels, rate_schedule[epoch])
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+      
+    train_acc = float(train_correct) / float(train_total)
+    return train_acc
diff --git a/deep-learning/models/__init__.py b/deep-learning/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/deep-learning/models/lenet.py b/deep-learning/models/lenet.py
new file mode 100644
index 00000000..cf9f14bf
--- /dev/null
+++ b/deep-learning/models/lenet.py
@@ -0,0 +1,69 @@
+import torch
+import torch.nn as nn 
+import torch.nn.functional as F
+from torchvision import transforms
+
+    
+class LeNet(nn.Module):
+    transform_train = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307, ),(0.3081, )),
+    ])
+    transform_test = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307, ),(0.3081, )),
+    ])
+
+    def __init__(self, input_channel=3, num_classes=10, k=1):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2d(input_channel, 6 * k, kernel_size=5)
+        self.conv2 = nn.Conv2d(6 * k, 16 * k, kernel_size=5)
+        self.fc1 = nn.Linear(16 * 4 * 4, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, num_classes)
+
+
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = out.view(out.shape[0], -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
+
+
+class LeNetDO(nn.Module):
+    transform_train = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307, ),(0.3081, )),
+    ])
+    transform_test = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307, ),(0.3081, )),
+    ])
+
+    def __init__(self, input_channel=3, num_classes=10, k=1, dropout_rate=0.25):
+        super(LeNetDO, self).__init__()
+        self.conv1 = nn.Conv2d(input_channel, 6 * k, kernel_size=5)
+        self.conv2 = nn.Conv2d(6 * k, 16 * k, kernel_size=5)
+        self.fc1 = nn.Linear(16 * 4 * 4, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, num_classes)
+        self.dropout_rate = dropout_rate
+
+
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.dropout2d(out, p=self.dropout_rate)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = F.dropout2d(out, p=self.dropout_rate)
+        out = out.view(out.shape[0], -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out
diff --git a/deep-learning/models/resnet.py b/deep-learning/models/resnet.py
new file mode 100644
index 00000000..3787f857
--- /dev/null
+++ b/deep-learning/models/resnet.py
@@ -0,0 +1,121 @@
+"""
+ResNet: resnet18 and resnet34 implementation
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import transforms
+
+
+class Linear(nn.Module):
+    def __init__(self, in_features, out_features):
+
+        super(Linear, self).__init__()
+        self.w = nn.Parameter(torch.randn(in_features, out_features))
+         
+    def forward(self, x):
+        x = x.mm(self.w)
+        return x
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1, dropout_rate=0):
+        super(BasicBlock, self).__init__()
+
+        self.dropout_rate = dropout_rate
+
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        if self.dropout_rate > 0:
+            out = F.dropout2d(out, p=self.dropout_rate)
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class ResNet(nn.Module):
+    transform_train = transforms.Compose([
+        transforms.RandomCrop(32, padding=4),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+    ])
+    transform_test = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+    ])
+
+    def __init__(self, block, num_blocks, input_channel, num_classes, dropout_rate=0):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(input_channel, 64, kernel_size=3, stride=1, padding=0, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, dropout_rate=dropout_rate)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, dropout_rate=dropout_rate)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, dropout_rate=dropout_rate)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, dropout_rate=dropout_rate)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        
+    def _make_layer(self, block, planes, num_blocks, stride, dropout_rate):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride, dropout_rate))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = x.float()
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+
+
+class ResNet18(ResNet):
+    def __init__(self, input_channel, num_classes):
+        super(ResNet18, self).__init__(
+            block=BasicBlock, num_blocks=[2,2,2,2], input_channel=input_channel, num_classes=num_classes
+        )
+    
+class ResNet34(ResNet):
+    def __init__(self, input_channel, num_classes):
+        super(ResNet34, self).__init__(
+            block=BasicBlock, num_blocks=[3,4,6,3], input_channel=input_channel, num_classes=num_classes
+        )
+
+
+class ResNet18DO(ResNet):
+    def __init__(self, input_channel, num_classes, dropout_rate=0.25):
+        super(ResNet18DO, self).__init__(
+            block=BasicBlock, num_blocks=[2,2,2,2], input_channel=input_channel, num_classes=num_classes,
+            dropout_rate=dropout_rate
+        )
+    
+class ResNet34DO(ResNet):
+    def __init__(self, input_channel, num_classes, dropout_rate=0.25):
+        super(ResNet34DO, self).__init__(
+            block=BasicBlock, num_blocks=[3,4,6,3], input_channel=input_channel, num_classes=num_classes,
+            dropout_rate=dropout_rate
+        )
diff --git a/deep-learning/models/resnet50.py b/deep-learning/models/resnet50.py
new file mode 100644
index 00000000..c1943003
--- /dev/null
+++ b/deep-learning/models/resnet50.py
@@ -0,0 +1,142 @@
+"""
+ResNet: resnet50, pre-trained on ImageNet
+"""
+
+import math
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+
+
+class Bottleneck(nn.Module):
+	expansion = 4
+
+	def __init__(self, inplanes, outplanes, stride=1, downsample=None, dropout_rate=0):
+		super().__init__()
+
+		self.dropout_rate = dropout_rate
+
+		self.conv1 = nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=False)
+		self.bn1 = nn.BatchNorm2d(outplanes)
+		
+		self.conv2 = nn.Conv2d(outplanes, outplanes, kernel_size=3, stride=stride, padding=1, bias=False)
+		self.bn2 = nn.BatchNorm2d(outplanes)
+
+		self.conv3 = nn.Conv2d(outplanes, outplanes*self.expansion, kernel_size=1, bias=False)
+		self.bn3 = nn.BatchNorm2d(outplanes*self.expansion)
+
+		self.relu = nn.ReLU(inplace=True)
+		self.downsample = downsample
+		self.stride = stride
+
+	def forward(self, x): 
+		residual = x 
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		out = self.relu(out)
+		if self.dropout_rate > 0:
+			out = nn.functional.dropout2d(out, p=self.dropout_rate)
+		out = self.conv3(out)
+		out = self.bn3(out)
+		if(self.downsample is not None):
+			residual = self.downsample(x)
+		out += residual
+		out = self.relu(out)
+
+		return out
+
+
+
+class ResNet(nn.Module):
+
+	def __init__(self, block, layer, num_classes, input_channel, dropout_rate=0):
+		super().__init__()
+		self.inplanes = 64
+		self.conv1 = nn.Conv2d(input_channel, 64, kernel_size=7, stride=2, padding=3, bias = False)
+		self.bn1 = nn.BatchNorm2d(64)
+		self.relu = nn.ReLU(inplace=True)
+		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+		self.layer1 = self.make_layer(block, 64, layer[0], dropout_rate=dropout_rate)
+		self.layer2 = self.make_layer(block, 128, layer[1], stride=2, dropout_rate=dropout_rate)
+		self.layer3 = self.make_layer(block, 256, layer[2], stride=2, dropout_rate=dropout_rate)
+		self.layer4 = self.make_layer(block, 512, layer[3], stride=2, dropout_rate=dropout_rate)
+
+		self.avgpool = nn.AvgPool2d(7, stride = 2)
+
+		self.dropout = nn.Dropout2d(p = 0.5, inplace = True)
+
+		self.fc = nn.Linear(512*block.expansion, num_classes)
+
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+				m.weight.data.normal_(0, math.sqrt(2./n))
+			elif isinstance(m, nn.BatchNorm2d):
+				m.weight.data.fill_(1)
+				m.bias.data.zero_()
+
+	def make_layer(self, block, planes, blocks, stride=1, dropout_rate=0):
+		downsample = None
+
+		if(stride !=1 or self.inplanes != planes * block.expansion):
+			downsample = nn.Sequential(
+				nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 
+				nn.BatchNorm2d(planes*block.expansion))
+
+		layers = []
+
+		layers.append(block(self.inplanes, planes, stride, downsample, dropout_rate=dropout_rate))
+		self.inplanes = planes * block.expansion
+		for i in range(1, blocks):
+			layers.append(block(self.inplanes, planes))
+
+
+		return nn.Sequential(*layers)
+
+	def fc_params(self):
+		params = []
+		for name, param in self.named_parameters():
+			if 'fc' in name:
+				params.append(param)
+		return params
+
+	def backbone_params(self):
+		params = []
+		for name, param in self.named_parameters():
+			if 'fc' not in name:
+				params.append(param)
+		return params
+
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.bn1(x)
+		x = self.relu(x)
+		x = self.maxpool(x)
+		x = self.layer1(x)
+		x = self.layer2(x)
+		x = self.layer3(x)
+		x = self.layer4(x)
+		x = self.avgpool(x)
+		x = self.dropout(x)
+		x = x.view(x.size(0), -1)
+		x = self.fc(x)
+		return x
+
+
+def resnet50(num_classes, input_channel, pretrained=True, dropout_rate=0):
+	model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes, input_channel, dropout_rate=dropout_rate)
+
+	if pretrained == True:
+		state_dict = model_zoo.load_url(
+			'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnet50_a1h2_176-001a1197.pth', 
+			map_location='cpu'
+		)
+		state_dict.pop('fc.weight', None)
+		state_dict.pop('fc.bias', None)
+		model.load_state_dict(state_dict, strict=False)
+		print("Loaded Imagenet pretrained model")
+
+	return model
diff --git a/deep-learning/utils.py b/deep-learning/utils.py
new file mode 100644
index 00000000..a007817e
--- /dev/null
+++ b/deep-learning/utils.py
@@ -0,0 +1,92 @@
+import tabulate
+import numpy as np
+
+import torch
+from torch.nn import functional as F
+from torch.autograd import Variable
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# DEVICE = 'cpu'
+
+
+# Multiplicative LR schedule for SGD
+def get_lr_factor(epoch):
+    """
+    Compute multiplicative factor to decay learning rate. 
+    To use for MNIST within 100 epochs
+    """
+    t = epoch / 100
+    lr_ratio = 0.01
+    if t <= 0.2:
+        factor = 1.0
+    elif t <= 0.4:
+        factor = 1.0 - (1.0 - lr_ratio) * (t - 0.2) / 0.2
+    else:
+        factor = lr_ratio
+    return factor
+
+
+# Print log table
+def output_table(epoch, n_epoch, time_ep=None, train_acc=None, test_acc=None):
+    columns = ['ep', 'time_ep', 'tr_acc', 'te_acc']
+    
+    values = [f"{epoch}/{n_epoch}", time_ep, train_acc, test_acc]
+    table = tabulate.tabulate(
+        [values], columns, tablefmt='simple',
+        stralign='center', numalign='center', floatfmt=f'8.2f'
+    )
+    if epoch % 40 == 0:
+        table = table.split('\n')
+        table = '\n'.join([table[1]] + table)
+    else:
+        table = table.split('\n')[2]
+    print(table)
+
+
+# Evaluate a single model
+def evaluate(test_loader, model):
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for images, labels, _ in test_loader:
+            images = Variable(images).to(DEVICE)
+            labels = Variable(labels).to(DEVICE)
+            logits = model(images)
+            outputs = F.softmax(logits, dim=1)
+            _, pred = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (pred == labels).sum()
+        acc = 100 * float(correct) / float(total)
+    return acc
+
+
+def accuracy(logit, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    output = F.softmax(logit, dim=1)
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+# Ratio of identified corrupted vs clean for debug
+def get_ratio_corrupted(mask, noise_mask):
+    clean_pred =(mask == True)
+    clean_true = noise_mask
+    clean_found = np.logical_and(clean_pred, clean_true)
+    clean_found_ratio = np.count_nonzero(clean_found) / np.count_nonzero(clean_true)
+    corrupted_pred = (mask == False)
+    corrupted_true = (noise_mask == False)
+    corrupted_found = np.logical_and(corrupted_pred, corrupted_true)
+    corrupted_found_ratio = np.count_nonzero(corrupted_found) / np.count_nonzero(corrupted_true)
+    return clean_found_ratio, corrupted_found_ratio
\ No newline at end of file
diff --git a/img/deep-learning-synthetic.png b/img/deep-learning-synthetic.png
new file mode 100644
index 00000000..f7ba5926
Binary files /dev/null and b/img/deep-learning-synthetic.png differ
diff --git a/img/online-learning.png b/img/online-learning.png
new file mode 100644
index 00000000..6ed42055
Binary files /dev/null and b/img/online-learning.png differ
diff --git a/online-learning/humanactivity.mat b/online-learning/humanactivity.mat
new file mode 100644
index 00000000..b5d808a5
Binary files /dev/null and b/online-learning/humanactivity.mat differ
diff --git a/online-learning/main.py b/online-learning/main.py
new file mode 100644
index 00000000..858c721f
--- /dev/null
+++ b/online-learning/main.py
@@ -0,0 +1,348 @@
+import os
+import numpy as np
+from matplotlib import pyplot as plt
+from matplotlib import font_manager as fm
+
+from scipy.optimize import minimize_scalar
+from scipy.io import loadmat
+
+from sklearn.linear_model import SGDClassifier
+from sklearn.utils import shuffle
+from sklearn import preprocessing
+
+
+random_gen = np.random.default_rng(seed=1)
+
+partial_fit_classifiers = {
+    'SGD': SGDClassifier(loss='log_loss', random_state=1),
+    'RRM': SGDClassifier(loss='log_loss', random_state=1),
+    'RLVI': SGDClassifier(loss='log_loss', random_state=1),
+}
+
+CLF_COLORS = {
+    'SGD': '#b85460',
+    'RRM': '#e69e00',
+    'RLVI': '#4477aa',
+}
+
+CLF_ALPHA = {
+    'SGD': 1.,
+    'RRM': 1.,
+    'RLVI': 1.,
+}
+
+# Use an open source Roboto font from Google (download first)
+if os.path.isfile("../Roboto.ttf"):
+    custom_font = fm.FontProperties(fname="../Roboto.ttf", size=22)
+else:
+    custom_font = fm.FontProperties(size=22)
+
+# To save results
+if not os.path.exists('plots'):
+    os.makedirs('plots')
+
+
+def update_weights_rlvi(losses, tol=1e-3, maxiter=100):
+    '''Optimize Bernoulli probabilities'''
+    exp_loss = np.exp(-losses)
+    weights = 0.5 * np.ones_like(losses)
+    for _ in range(maxiter):
+        avg_weight = np.mean(weights)
+        ratio = avg_weight / (1 - avg_weight)
+        new_weights = ratio * exp_loss / (1 + ratio * exp_loss)
+        error = np.linalg.norm(new_weights - weights)
+        if error < tol:
+            break
+        weights = np.copy(new_weights)
+    new_weights /= np.max(new_weights) * len(new_weights)
+    return new_weights
+
+
+def update_weights_rrm(residuals, eps):
+    '''Optimize sample weights for Robust Risk Minimization'''
+    res = np.copy(residuals)
+    t = -np.log((1 - eps) * res.shape[0])
+    numeric_cutoff = 1e-16
+
+    def objective(xi):
+        phi = np.exp(-res * np.exp(-xi))
+        phi[phi < numeric_cutoff] = numeric_cutoff
+        sum_phi = np.sum(phi)
+        return np.exp(xi) * (np.log(sum_phi) + t)
+
+    opt_res = minimize_scalar(objective)
+    opt_xi = opt_res['x']
+    opt_alpha = np.exp(opt_xi)
+    phi = np.exp(-res / opt_alpha)
+    phi[phi < numeric_cutoff] = numeric_cutoff
+    sum_phi = np.sum(phi)
+    opt_beta_over_alpha = np.log(sum_phi) - 1
+    opt_weights = np.exp(-res / opt_alpha) * np.exp(-opt_beta_over_alpha - 1)
+    return opt_weights
+
+
+def cross_entropy(log_proba, targets):
+    return -targets * log_proba - (1 - targets) * log_proba
+
+
+def smooth_triangle(data, degree=1):
+    '''Moving average filter (triangle scheme)'''
+    triangle = np.concatenate((np.arange(degree + 1), np.arange(degree)[::-1]))
+    smoothed = []
+    for i in range(degree, len(data) - degree * 2):
+        point = data[i : i + len(triangle)] * triangle
+        smoothed.append(np.sum(point) / np.sum(triangle))
+    # Boundaries
+    smoothed = [smoothed[0]] * int(degree + degree / 2) + smoothed
+    while len(smoothed) < len(data):
+        smoothed.append(smoothed[-1])
+    return smoothed
+
+
+def plot_score_evolution(clf_stats, smooth_deg=1):
+    def plot_score(x, y, name):
+        '''Plot accuracy vs number of samples'''
+        x = np.array(x)
+        y = np.array(y) * 100
+        y = smooth_triangle(y, smooth_deg)
+        plt.plot(x, y, lw=3, c=CLF_COLORS[name], alpha=CLF_ALPHA[name])
+
+    clf_names = list(clf_stats.keys())
+    plt.figure()
+    for name, stats in clf_stats.items():
+        # Plot accuracy evolution with #examples
+        score, n_examples = zip(*stats['acc_history'])
+        # Scale by thousands
+        n_examples = np.array(n_examples) / 1000.
+        plot_score(n_examples, score, name)
+        plt.gca()
+
+    plt.xlabel(r'observed samples $\times 10^3$', font_properties=custom_font)
+    plt.ylabel(r'Accuracy, %', font_properties=custom_font)
+    plt.xlim(1, 10.8)
+    plt.xticks(fontsize=20)
+    plt.yticks(np.arange(80, 101, 5), fontsize=20)
+    plt.ylim(85, 100)
+    plt.grid(True)
+    plt.legend(clf_names, loc='lower left', fontsize=19)
+    plt.tight_layout()
+    plt.savefig("plots/hard_score.png", dpi=300)
+    plt.close()
+
+
+def plot_tp_evolution(clf_stats, smooth_deg=1):
+    def plot_recall(x, y, name):
+        '''Plot recall for pos. class vs number of samples'''
+        x = np.array(x)
+        y = np.array(y)
+        y = smooth_triangle(y, smooth_deg)
+        plt.plot(x, y, lw=3, c=CLF_COLORS[name], alpha=CLF_ALPHA[name])
+
+    clf_names = list(clf_stats.keys())
+    plt.figure()
+    for name, stats in clf_stats.items():
+        # Plot recall evolution with #examples
+        recall, n_examples = zip(*stats['tp_history'])
+        # Scale by thousands
+        n_examples = np.array(n_examples) / 1000.
+        plot_recall(n_examples, recall, name)
+        plt.gca()
+
+    plt.xlabel(r'observed samples $\times 10^3$', font_properties=custom_font)
+    plt.ylabel('Recall', font_properties=custom_font)
+    plt.xticks(fontsize=20)
+    plt.yticks(np.arange(0.7, 1.01, 0.1), fontsize=20)
+    plt.ylim(0.75, 1)
+    plt.xlim(1, 10.8)
+    plt.grid(True)
+    plt.legend(clf_names, loc='lower left', fontsize=19)
+    plt.tight_layout()
+    plt.savefig("plots/hard_tp.png", dpi=300)
+    plt.close()
+
+
+def plot_tn_evolution(clf_stats, smooth_deg=1):
+    def plot_recall(x, y, name):
+        '''Plot recall for neg. class vs number of samples'''
+        x = np.array(x)
+        y = np.array(y)
+        y = smooth_triangle(y, smooth_deg)
+        plt.plot(x, y, lw=3, c=CLF_COLORS[name], alpha=CLF_ALPHA[name])
+
+    clf_names = list(clf_stats.keys())
+    plt.figure()
+    for name, stats in clf_stats.items():
+        # Plot recall evolution with #examples
+        recall, n_examples = zip(*stats['tn_history'])
+        # Scale by thousands
+        n_examples = np.array(n_examples) / 1000.
+        plot_recall(n_examples, recall, name)
+        plt.gca()
+
+    plt.xlabel(r'observed samples $\times 10^3$', font_properties=custom_font)
+    plt.ylabel('TN rate', font_properties=custom_font)
+    plt.xlim(1, 10.8)
+    plt.xticks(fontsize=20)
+    plt.yticks(np.arange(0.8, 1.01, 0.05), fontsize=20)
+    plt.ylim(0.8, 1)
+    plt.grid(True)
+    plt.legend(clf_names, loc='lower left', fontsize=19)
+    plt.tight_layout()
+    plt.savefig("plots/hard_tn.png", dpi=300)
+    plt.close()
+
+
+def plot_f1_evolution(clf_stats, smooth_deg=1):
+    def plot_f1(x, y, name):
+        '''Plot f1-score vs number of samples'''
+        x = np.array(x)
+        y = np.array(y)
+        y = smooth_triangle(y, smooth_deg)
+        plt.plot(x, y, lw=3, c=CLF_COLORS[name], alpha=CLF_ALPHA[name])
+
+    clf_names = list(clf_stats.keys())
+    plt.figure()
+    for name, stats in clf_stats.items():
+        # Plot recall evolution with #examples
+        recall, n_examples = zip(*stats['f1_history'])
+        # Scale by thousands
+        n_examples = np.array(n_examples) / 1000.
+        plot_f1(n_examples, recall, name)
+        plt.gca()
+
+    plt.xlabel(r'observed samples $\times 10^3$', font_properties=custom_font)
+    plt.ylabel(r'$F_1{-}$score', font_properties=custom_font)
+    plt.xlim(1, 10.8)
+    plt.xticks(fontsize=20)
+    plt.yticks(np.arange(0.8, 1.01, 0.05), fontsize=20)
+    plt.ylim(0.8, 1)
+    plt.grid(True)
+    plt.legend(clf_names, loc='lower left', fontsize=19)
+    plt.tight_layout()
+    plt.savefig("plots/hard_f1.png", dpi=300)
+    plt.close()
+
+
+def read_hard_dataset(file):
+    data = loadmat(file)
+    X = data['feat']
+    y = data['actid'].flatten()
+    y = np.array(y > 2, dtype=int)
+    return X, y
+
+
+def pert(a, b, c, lmbda=4):
+    r = c - a
+    alpha = 1 + lmbda * (b - a) / r
+    beta = 1 + lmbda * (c - b) / r
+    return a + float(random_gen.beta(alpha, beta)) * r
+
+
+def iter_minibatches(X, y, batch_size=100, test_size=0.2, noise_rate=0, dynamic=False):
+    size = len(X)
+    for idx in range(0, size, batch_size):
+        left = idx
+        right = min(idx + batch_size, size)
+        X_batch = X[left:right]
+        y_batch = y[left:right]
+        # Split the batch into training and test part
+        X_test = X_batch[:int(test_size * len(y_batch))]
+        y_test = y_batch[:int(test_size * len(y_batch))]
+        X_train = X_batch[int(test_size * len(y_batch)):]
+        y_train = y_batch[int(test_size * len(y_batch)):]
+        # Normalize features based on training data
+        scaler = preprocessing.StandardScaler().fit(X_train)
+        X_train = scaler.transform(X_train)
+        X_test = scaler.transform(X_test)
+        # Corrupt training data
+        if noise_rate > 0:
+            y_positive = np.where(y_train == 1)[0]
+            if dynamic:
+                n_corrupted = int(pert(a=0, b=min(0.1, noise_rate), c=noise_rate) * len(y_positive))
+            else:
+                n_corrupted = int(noise_rate * len(y_positive))
+            corrupted_labels = random_gen.choice(y_positive, size=n_corrupted, replace=False)
+            y_train[corrupted_labels] = 0
+        yield (X_train, y_train), (X_test, y_test)
+
+
+def check_fitted(clf):
+    return hasattr(clf, "classes_")
+
+
+data_X, data_y = read_hard_dataset('./humanactivity.mat')
+data_X, data_y = shuffle(data_X, data_y, random_state=1)
+
+
+def main():
+    clf_stats = {}
+    for clf_name in partial_fit_classifiers:
+        stats = {'n_train': 0, 'acc': 0.0, 'acc_history': [(0, 0)],
+                 'tp': 0, 'tn': 0, 'tp_history': [(0, 0)], 'tn_history': [(0, 0)],
+                 'f1': 0, 'f1_history': [(0, 0)],
+                }
+        clf_stats[clf_name] = stats
+
+    minibatch_iterators = iter_minibatches(data_X, data_y, batch_size=200, test_size=0.5, noise_rate=0.3, dynamic=True)
+    # Main loop : iterate on mini-batchs of examples
+    for i, batch in enumerate(minibatch_iterators):
+        (X_batch, y_batch), (X_test, y_test) = batch
+        for clf_name, clf in partial_fit_classifiers.items():
+            if clf_name == 'RLVI':
+                if (not check_fitted(clf)):
+                    log_proba = np.log(0.5 * np.ones_like(y_batch))
+                else:
+                    log_proba = clf.predict_log_proba(X_batch)[:, 1]
+                residuals = cross_entropy(log_proba, y_batch)
+                sample_weight = update_weights_rlvi(residuals)
+                # update estimator with examples in the current mini-batch
+                clf.partial_fit(X_batch, y_batch, classes=[0, 1], sample_weight=sample_weight)
+
+            elif clf_name == 'RRM':
+                if not check_fitted(clf):
+                    log_proba = np.log(0.5 * np.ones_like(y_batch))
+                else:
+                    log_proba = clf.predict_log_proba(X_batch)[:, 1]
+                residuals = cross_entropy(log_proba, y_batch)
+                sample_weight = update_weights_rrm(residuals, eps=0.3)
+                # update estimator with examples in the current mini-batch
+                clf.partial_fit(X_batch, y_batch, classes=[0, 1], sample_weight=sample_weight)
+
+            else:
+                # update estimator with examples in the current mini-batch
+                clf.partial_fit(X_batch, y_batch, classes=[0, 1])
+
+            # accumulate test accuracy stats
+            clf_stats[clf_name]['n_train'] += len(X_batch)
+
+            y_pred = clf.predict(X_test)
+            
+            true_positive = np.count_nonzero(np.logical_and(y_pred == 1, y_test == 1))
+            clf_stats[clf_name]['tp'] = true_positive / np.count_nonzero(y_test == 1)
+            tp_history = (clf_stats[clf_name]['tp'], clf_stats[clf_name]['n_train'])
+            clf_stats[clf_name]['tp_history'].append(tp_history)
+            
+            true_negative = np.count_nonzero(np.logical_and(y_pred == 0, y_test == 0))
+            clf_stats[clf_name]['tn'] = true_negative / np.count_nonzero(y_test == 0)
+            tn_history = (clf_stats[clf_name]['tn'], clf_stats[clf_name]['n_train'])
+            clf_stats[clf_name]['tn_history'].append(tn_history)
+            
+            false_positive = np.count_nonzero(np.logical_and(y_pred == 1, y_test == 0))
+            false_negative = np.count_nonzero(np.logical_and(y_pred == 0, y_test == 1))
+            f1 = true_positive / (true_positive + (false_positive + false_negative) / 2.)
+            clf_stats[clf_name]['f1'] = f1
+            f1_history = (clf_stats[clf_name]['f1'], clf_stats[clf_name]['n_train'])
+            clf_stats[clf_name]['f1_history'].append(f1_history)
+
+            clf_stats[clf_name]['acc'] = clf.score(X_test, y_test)
+            acc_history = (clf_stats[clf_name]['acc'], clf_stats[clf_name]['n_train'])
+            clf_stats[clf_name]['acc_history'].append(acc_history)
+
+    smooth_deg = 10
+    plot_score_evolution(clf_stats, smooth_deg=smooth_deg)
+    plot_tp_evolution(clf_stats, smooth_deg=smooth_deg)
+    plot_tn_evolution(clf_stats, smooth_deg=smooth_deg)
+    plot_f1_evolution(clf_stats, smooth_deg=smooth_deg)
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..9ee2bdd8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+numpy==1.24.0
+scipy==1.12.0
+scikit-learn==1.2.0
+torch==2.1.2
+torchvision==0.16.2
+matplotlib==3.7.0
+tabulate==0.9.0
diff --git a/standard-learning/huber.py b/standard-learning/huber.py
new file mode 100644
index 00000000..1efb3525
--- /dev/null
+++ b/standard-learning/huber.py
@@ -0,0 +1,40 @@
+import numpy as np
+from scipy.stats import chi2
+from scipy.linalg import lstsq
+
+def psi_huber(vec, c):
+    psi = np.zeros(len(vec))
+    mask = np.abs(vec) <= c
+    psi[mask] = vec[mask]
+    psi[~mask] = c * np.sign(vec[~mask])
+    return psi
+
+
+def linear_regression(X, y, c=0.7317, sigma0=1):
+    n = X.shape[0]
+    alpha = c**2 * (1 - chi2.cdf(c**2, df=1)) + chi2.cdf(c**2, df=3)
+
+    theta0 = np.linalg.solve(X.T @ X, X.T @ y)
+
+    while True:
+        # update residual
+        res = y - X @ theta0
+        # update pesudo residual
+        res_psi = sigma0 * psi_huber(res / sigma0, c)
+        # update scale
+        sigma = 1 / np.sqrt(n*2 * alpha) * np.linalg.norm(res_psi)
+        # (re)update the pseudo residual
+        res_psi = sigma * psi_huber(res / sigma, c)
+        # regress X on r_psi
+        # delta = np.linalg.solve(X.T @ X, X.T @ res_psi)
+        delta = lstsq(X, res_psi)[0]
+        # update theta
+        theta = theta0 + delta
+        # stopping criteria
+        if (np.linalg.norm(theta - theta0) / np.linalg.norm(theta0)) > 1e-6:
+            sigma0 = sigma
+            theta0 = theta
+        else:
+            break
+
+    return theta
diff --git a/standard-learning/main.py b/standard-learning/main.py
new file mode 100644
index 00000000..1f8c7dcf
--- /dev/null
+++ b/standard-learning/main.py
@@ -0,0 +1,585 @@
+import os
+import numpy as np
+from scipy.linalg import lstsq
+from matplotlib import pyplot as plt
+from matplotlib import font_manager as fm
+
+import huber
+import sever
+import rrm
+import rlvi
+
+import utils
+
+
+# For reproducibility
+SEED = 0
+random_gen = np.random.default_rng(seed=SEED)
+
+
+# Properties for boxplots
+box_plot_props = dict(
+    widths=0.6,
+    whis=[0, 100],
+    meanprops={'marker': 'o', 'markersize': 8, 'markerfacecolor': 'red', 'markeredgewidth': 0},
+    medianprops={'linestyle': '-', 'linewidth': 1, 'color': 'r'},
+    whiskerprops={'linestyle': (0, (10, 10))},
+    boxprops={'c': 'blue', 'lw': 0.8},
+    showfliers=False, autorange=True, showmeans=True,
+)
+textbox_props = dict(boxstyle='round', facecolor='white')
+
+# Use an open source Roboto font from Google (download first)
+if os.path.isfile("../Roboto.ttf"):
+    custom_font = fm.FontProperties(fname="../Roboto.ttf", size=22)
+else:
+    custom_font = fm.FontProperties(size=22)
+
+
+# To save results
+if not os.path.exists('plots'):
+    os.makedirs('plots')
+
+
+def generate_data_mean(size=100, eps=0.2):
+    n2 = random_gen.binomial(n=size, p=eps)  # number of corrupted samples
+    n1 = size - n2
+
+    mean = np.array([200, 200])
+    mean = mean.reshape((-1, 1))
+    sigma = np.array([
+        [400, 50],
+        [50, 400]
+    ])
+    sigma_root = np.linalg.cholesky(sigma)
+    sample1 = mean + sigma_root @ random_gen.normal(size=(2, n1))
+
+    nu = 2.5
+    C = 2 * (nu / (nu - 2)) * sigma
+    C_root = np.linalg.cholesky(C)
+    u = random_gen.chisquare(df=nu, size=n2) / nu
+    u = u[None, :]
+    y = C_root @ random_gen.normal(size=(2, n2))
+    sample2 = mean + y / np.sqrt(u)
+
+    sample = np.hstack([sample1, sample2])
+    return sample.T
+
+
+def generate_data_linear_regression(size=40, eps=0.2, nu=1.5):
+    X = -5 + 10 * random_gen.random(size=(size, 10))
+    n2 = random_gen.binomial(n=size, p=eps)  # number of corrupted samples
+    n1 = size - n2
+
+    theta = np.ones(X.shape[-1])
+    sigma = 0.25
+    y1 = X[:n1] @ theta + sigma * random_gen.normal(size=n1)
+
+    C = 1
+    u = random_gen.chisquare(df=nu, size=n2) / nu
+    u = u[None, :]
+    v = np.sqrt(C) * random_gen.normal(size=n2)
+    y2 = X[n1:] @ theta + v / np.sqrt(u)
+
+    y = np.vstack([y1.reshape((-1, 1)), y2.reshape((-1, 1))]).flatten()
+    return X, y
+
+
+def generate_data_logistic_regression(size=100, eps=0.05):
+    n2 = random_gen.binomial(n=size, p=eps)  # number of corrupted samples
+    n1 = size - n2
+    theta = np.array([-1, 1, 1])
+
+    mean1 = np.array([0.5, 0.5])
+    mean1 = mean1.reshape((-1, 1))
+    rho = 0.99
+    sigma1 = np.array([
+        [0.25, -0.25 * rho],
+        [-0.25 * rho, 0.25]
+    ])
+    sigma_root1 = np.linalg.cholesky(sigma1)
+    X1 = mean1 + sigma_root1 @ random_gen.normal(size=(2, n1))
+    X1 = X1.T
+    X1_aug = np.hstack([
+        np.ones((X1.shape[0], 1)),
+        X1,
+    ])
+    inverse_sigmoid = 1 + np.exp(-X1_aug @ theta)
+    y1 = np.ones(X1.shape[0])
+    y1[inverse_sigmoid > 2] = 0
+
+    mean2 = np.array([0.5, 1.25])
+    mean2 = mean2.reshape((-1, 1))
+    sigma_root2 = np.array([
+        [0.1, 0],
+        [0, 0.1]
+    ])
+    X2 = mean2 + sigma_root2 @ random_gen.normal(size=(2, n2))
+    X2 = X2.T
+    y2 = np.zeros(X2.shape[0])
+
+    X = np.vstack([X1, X2])
+    y = np.vstack([y1.reshape((-1, 1)), y2.reshape((-1, 1))]).flatten()
+    return X, y
+
+
+def generate_data_pca(size=200, eps=0.2):
+    n2 = random_gen.binomial(n=size, p=eps)  # number of corrupted samples
+    n1 = size - n2
+
+    z1 = random_gen.normal(size=n1)
+    sigma = 0.25
+    z2 = 2 * z1 + sigma * random_gen.normal(size=n1)
+    z_true = np.column_stack([z1, z2])
+
+    nu = 1.5
+    u = random_gen.chisquare(df=nu, size=n2) / nu
+    u = u[None, :]
+    v = random_gen.normal(size=(2, n2))
+    z_false = v / np.sqrt(u)
+    z_false = z_false.T
+
+    sample = np.vstack([z_true, z_false])
+    return sample
+
+
+def generate_data_cov(size=50, eps=0.2):
+    n2 = random_gen.binomial(n=size, p=eps)  # number of corrupted samples
+    n1 = size - n2
+
+    true_mean = np.zeros((2, 1))
+    true_sigma = np.array([
+        [1, 0.8],
+        [0.8, 1]
+    ])
+    true_sigma_root = np.linalg.cholesky(true_sigma)
+
+    sample1 = true_mean + true_sigma_root @ random_gen.normal(size=(2, n1))
+
+    nu = 1.5
+
+    u = random_gen.chisquare(df=nu, size=n2) / nu
+    u = u[None, :]
+    y = true_sigma_root @ random_gen.normal(size=(2, n2))
+    sample2 = true_mean + y / np.sqrt(u)
+
+    sample = np.hstack([sample1, sample2])
+    return sample.T
+
+
+def test_mean():
+    print("Running test for mean estimation")
+
+    true_mean = np.array([200, 200])
+    MC_runs = 100
+    eps_values = np.linspace(0, 0.4, 20)
+    ml_errors = [0 for _ in range(len(eps_values))]
+    rrm_errors = [0 for _ in range(len(eps_values))]
+    rlvi_errors = [0 for _ in range(len(eps_values))]
+
+    for _ in range(MC_runs):
+        for i, eps in enumerate(eps_values):
+            samples = generate_data_mean(size=100, eps=eps)
+            mean_ml = np.mean(samples, axis=0)
+            mean_rrm = rrm.mean(samples, eps=0.4)
+            mean_rlvi = rlvi.mean(samples)
+            ml_errors[i] += np.linalg.norm(true_mean - mean_ml) / np.linalg.norm(true_mean) / MC_runs
+            rrm_errors[i] += np.linalg.norm(true_mean - mean_rrm) / np.linalg.norm(true_mean) / MC_runs
+            rlvi_errors[i] += np.linalg.norm(true_mean - mean_rlvi) / np.linalg.norm(true_mean) / MC_runs
+
+    plt.rcParams["mathtext.fontset"] = "cm"
+    plt.plot(eps_values, ml_errors, lw=3, c='brown', label='ML')
+    plt.plot(eps_values, rrm_errors, lw=3, c='green', label='RRM')
+    plt.plot(eps_values, rlvi_errors, lw=3, c='blue', label='RLVI')
+    plt.xlabel(r"corruption level $\varepsilon$", font_properties=custom_font)
+    # plt.ylabel("$Average\ relative\ error$", fontsize=22)
+    plt.ylabel("Mean\navg. relative error", font_properties=custom_font)
+    plt.xticks(np.arange(0, 0.5, 0.1), fontsize=20)
+    plt.yticks(fontsize=20)
+    plt.xlim(0, 0.4)
+    # plt.ylim(0, 0.4)
+    plt.legend(fontsize=18)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/mean.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_mean_fixed_eps():
+    print("Running test for mean estimation (fixed eps.)")
+
+    true_mean = np.array([200, 200])
+    MC_runs = 100
+    
+    ml_errors = []
+    rrm_errors = []
+    rlvi_errors = []
+
+    for _ in range(MC_runs):
+        samples = generate_data_mean(size=100, eps=0.2)
+        mean_ml = np.mean(samples, axis=0)
+        mean_rrm = rrm.mean(samples, eps=0.4)
+        mean_rlvi = rlvi.mean(samples)
+        ml_errors.append(np.linalg.norm(true_mean - mean_ml) / np.linalg.norm(true_mean))
+        rrm_errors.append(np.linalg.norm(true_mean - mean_rrm) / np.linalg.norm(true_mean))
+        rlvi_errors.append(np.linalg.norm(true_mean - mean_rlvi) / np.linalg.norm(true_mean))
+
+    errors_to_plot = {
+        "ML": ml_errors,
+        "RRM": rrm_errors,
+        "RLVI": rlvi_errors
+    }
+
+    plt.rcParams["mathtext.fontset"] = "cm"
+    for i, errors in enumerate(errors_to_plot.values()):
+        plt.boxplot(errors, positions=[i + 1], **box_plot_props)
+    plt.xticks(range(1, len(errors_to_plot) + 1), errors_to_plot.keys(), font_properties=custom_font)
+    plt.ylabel("Mean\nrelative error", font_properties=custom_font)
+    plt.ylim(0, 0.12)
+    plt.yticks(np.arange(0, 0.20, 0.05), fontsize=20)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/mean_boxplots.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_linear_regression():
+    print("Running test for linear regression")
+
+    true_theta = np.ones(10)
+    eps_values = np.linspace(0, 0.4, 20)
+
+    ml_loss = []
+    huber_loss = []
+    sever_loss = []
+    rrm_loss = []
+    rlvi_loss = []
+
+    MC_runs = 100
+    for eps in eps_values:
+        ml_loss_avg = 0
+        huber_loss_avg = 0
+        sever_loss_avg = 0
+        rrm_loss_avg = 0
+        rlvi_loss_avg = 0
+        for _ in range(MC_runs):
+            X, y = generate_data_linear_regression(eps=eps, nu=2.5)
+            theta_ml = lstsq(X, y)[0]
+            theta_huber = huber.linear_regression(X, y)
+            theta_sever = sever.linear_regression(X, y, eps=0.4)
+            theta_rrm = rrm.linear_regression(X, y, eps=0.4)
+            theta_rlvi = rlvi.linear_regression(X, y)
+
+            ml_loss_avg += np.linalg.norm(true_theta - theta_ml)
+            huber_loss_avg += np.linalg.norm(true_theta - theta_huber)
+            sever_loss_avg += np.linalg.norm(true_theta - theta_sever)
+            rrm_loss_avg += np.linalg.norm(true_theta - theta_rrm)
+            rlvi_loss_avg += np.linalg.norm(true_theta - theta_rlvi)
+
+        ml_loss.append(ml_loss_avg / np.linalg.norm(true_theta) / MC_runs)
+        huber_loss.append(huber_loss_avg / np.linalg.norm(true_theta) / MC_runs)
+        sever_loss.append(sever_loss_avg / np.linalg.norm(true_theta) / MC_runs)
+        rrm_loss.append(rrm_loss_avg / np.linalg.norm(true_theta) / MC_runs)
+        rlvi_loss.append(rlvi_loss_avg / np.linalg.norm(true_theta) / MC_runs)
+
+    plt.rcParams["mathtext.fontset"] = "cm"
+    plt.plot(eps_values, ml_loss, lw=3, c='brown', label='ML')
+    plt.plot(eps_values, huber_loss, lw=3, c='chocolate', label='Huber')
+    plt.plot(eps_values, sever_loss, lw=3, c='orange', label='SEVER')
+    plt.plot(eps_values, rrm_loss, lw=3, c='teal', label='RRM')
+    plt.plot(eps_values, rlvi_loss, lw=3, c='#4477aa', label='RLVI')
+
+    plt.xlabel(r"corruption level, $\varepsilon$", font_properties=custom_font)#, labelpad=10)
+    plt.ylabel("LinReg\navg. relative error", font_properties=custom_font)
+    plt.xticks(np.arange(0, 0.5, 0.1), fontsize=20)
+    plt.xlim(0, 0.4)
+    plt.yticks(np.arange(0.010, 0.08, 0.01), fontsize=20)
+    plt.ylim(0.015, 0.08)
+    plt.legend(fontsize=18, framealpha=1)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/linear_regression.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_linear_regression_fixed_eps():
+    print("Running test for linear regression (fixed eps.)")
+
+    true_theta = np.ones(10)
+
+    ml_errors = []
+    huber_errors = []
+    sever_errors = []
+    rrm_errors = []
+    rlvi_errors = []
+
+    MC_runs = 100
+    for _ in range(MC_runs):
+        X, y = generate_data_linear_regression(eps=0.2, nu=2.5)
+        theta_ml = np.linalg.solve(X.T @ X, X.T @ y)
+        theta_huber = huber.linear_regression(X, y)
+        theta_sever = sever.linear_regression(X, y, eps=0.4)
+        theta_rrm = rrm.linear_regression(X, y, eps=0.4)
+        theta_rlvi = rlvi.linear_regression(X, y)
+        
+        ml_errors.append(np.linalg.norm(true_theta - theta_ml) / np.linalg.norm(true_theta))
+        huber_errors.append(np.linalg.norm(true_theta - theta_huber) / np.linalg.norm(true_theta))
+        sever_errors.append(np.linalg.norm(true_theta - theta_sever) / np.linalg.norm(true_theta))
+        rrm_errors.append(np.linalg.norm(true_theta - theta_rrm) / np.linalg.norm(true_theta))
+        rlvi_errors.append(np.linalg.norm(true_theta - theta_rlvi) / np.linalg.norm(true_theta))
+
+    errors_to_plot = {
+        "ML": ml_errors,
+        "Huber": huber_errors,
+        "SEVER": sever_errors,
+        "RRM": rrm_errors,
+        "RLVI": rlvi_errors
+    }
+    fig, ax = plt.subplots()
+    plt.rcParams["mathtext.fontset"] = "cm"
+    for i, errors in enumerate(errors_to_plot.values()):
+        plt.boxplot(errors, positions=[i + 1], **box_plot_props)
+    plt.xticks(range(1, len(errors_to_plot) + 1), errors_to_plot.keys(), fontsize=20)
+    plt.ylabel("LinReg\nrelative error", font_properties=custom_font)
+    plt.ylim(0, 0.15)
+    plt.yticks(np.arange(0, 0.16, 0.05), font_properties=custom_font)
+
+    plt.text(0.95, 0.9, r"$\varepsilon=20\%$",
+             transform=ax.transAxes, fontsize=22,
+             ha='right', va='top', bbox=textbox_props)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/linear_regression_boxplots.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_logistic_regression():
+    print("Running test for logistic regression")
+
+    true_theta = np.array([-1, 1, 1])
+    eps_values = np.linspace(0, 0.4, 20)
+
+    ml_loss = []
+    sever_loss = []
+    rrm_loss = []
+    rlvi_loss = []
+
+    MC_runs = 100
+    for eps in eps_values:
+        ml_loss_avg = 0
+        sever_loss_avg = 0
+        rrm_loss_avg = 0
+        rlvi_loss_avg = 0
+        for _ in range(MC_runs):
+            X, y = generate_data_logistic_regression(eps=eps)
+            theta_ml, _ = utils.sklearn_log_reg(X, y)
+            # theta_ml, _ = utils.mm_log_reg(X, y)
+            theta_sever = sever.logistic_regression(X, y, eps=0.1)
+            theta_rrm = rrm.logistic_regression(X, y, eps=0.4)
+            theta_rlvi = rlvi.logistic_regression(X, y)
+
+            ml_loss_avg += np.arccos(
+                theta_ml @ true_theta / (np.linalg.norm(theta_ml) * np.linalg.norm(true_theta))
+            ) * 180 / np.pi
+            sever_loss_avg += np.arccos(
+                theta_sever @ true_theta / (np.linalg.norm(theta_sever) * np.linalg.norm(true_theta))
+            ) * 180 / np.pi
+            rrm_loss_avg += np.arccos(
+                theta_rrm @ true_theta / (np.linalg.norm(theta_rrm) * np.linalg.norm(true_theta))
+            ) * 180 / np.pi
+            rlvi_loss_avg += np.arccos(
+                theta_rlvi @ true_theta / (np.linalg.norm(theta_rlvi) * np.linalg.norm(true_theta))
+            ) * 180 / np.pi
+
+        ml_loss.append(ml_loss_avg / MC_runs)
+        sever_loss.append(sever_loss_avg / MC_runs)
+        rrm_loss.append(rrm_loss_avg / MC_runs)
+        rlvi_loss.append(rlvi_loss_avg / MC_runs)
+
+    plt.rcParams["mathtext.fontset"] = "cm"
+    # plt.plot(eps_values, ml_loss, lw=3, c='brown', label='ML')
+    plt.plot(eps_values, sever_loss, lw=3, c='orange', label='SEVER')
+    plt.plot(eps_values, rrm_loss, lw=3, c='green', label='RRM')
+    plt.plot(eps_values, rlvi_loss, lw=3, c='blue', label='RLVI')
+    plt.xlabel(r"corruption level $\varepsilon$", font_properties=custom_font)
+    plt.ylabel("LogReg\navg. angle in degree", font_properties=custom_font)
+    
+    plt.xticks(np.arange(0, 0.5, 0.1), fontsize=20)
+    plt.xlim(0, 0.4)
+    plt.yticks(fontsize=20)
+    plt.legend(fontsize=18)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/logistic_regression.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_logistic_regression_fixed_eps():
+    print("Running test for logistic regression (fixed eps.)")
+
+    true_theta = np.array([-1, 1, 1])
+    ml_angles = []
+    sever_angles = []
+    rrm_angles = []
+    rlvi_angles = []
+    MC_runs = 100
+    for _ in range(MC_runs):
+        X, y = generate_data_logistic_regression(size=200, eps=0.05)
+        theta_ml, _ = utils.sklearn_log_reg(X, y, weights=np.ones(len(y)))
+        # theta_ml, _ = utils.mm_log_reg(X, y, weights=np.ones(len(y)))
+        theta_sever = sever.logistic_regression(X, y, eps=0.3)
+        theta_rrm = rrm.logistic_regression(X, y, eps=0.3)
+        theta_rlvi = rlvi.logistic_regression(X, y)
+        ml_angles.append(
+            np.arccos(theta_ml @ true_theta / (np.linalg.norm(theta_ml) * np.linalg.norm(true_theta))) * 180 / np.pi
+        )
+        sever_angles.append(
+            np.arccos(theta_sever @ true_theta / (np.linalg.norm(theta_sever) * np.linalg.norm(true_theta))) * 180 / np.pi
+        )
+        rrm_angles.append(
+            np.arccos(theta_rrm @ true_theta / (np.linalg.norm(theta_rrm) * np.linalg.norm(true_theta))) * 180 / np.pi
+        )
+        rlvi_angles.append(
+            np.arccos(theta_rlvi @ true_theta / (np.linalg.norm(theta_rlvi) * np.linalg.norm(true_theta))) * 180 / np.pi
+        )
+
+    errors_to_plot = {
+        "ML": ml_angles,
+        "SEVER": sever_angles,
+        "RRM": rrm_angles,
+        "RLVI": rlvi_angles
+    }
+    fig, ax = plt.subplots()
+    plt.rcParams["mathtext.fontset"] = "cm"
+    for i, errors in enumerate(errors_to_plot.values()):
+        plt.boxplot(errors, positions=[i + 1], **box_plot_props)
+    plt.xticks(range(1, len(errors_to_plot) + 1), errors_to_plot.keys(), font_properties=custom_font)
+    plt.ylabel("LogReg\nangle in degree", font_properties=custom_font)
+    plt.ylim(0, 30)
+    plt.yticks(np.arange(0, 35, 10), fontsize=20)
+    plt.text(0.95, 0.9, r"$\varepsilon=5\%$",
+             transform=ax.transAxes, fontsize=22,
+             ha='right', va='top', bbox=textbox_props)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/logistic_regression_boxplots.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_pca():
+    print("Running test for PCA")
+
+    true_theta = np.array([1 / np.sqrt(5), 2 / np.sqrt(5)])
+    ml_errors = []
+    rrm_errors = []
+    sever_errors = []
+    rlvi_errors = []
+    MC_runs = 100
+
+    # Original benchmark in Osama et al. (2020) used the initial guess for RRM – so do we.
+    # For a fair comparison we use it for all methods: SEVER, RRM, and RLVI
+    theta_init = np.array([0.1, 1])
+    theta_init /= np.linalg.norm(theta_init)
+    for _ in range(MC_runs):
+        sample = generate_data_pca(size=40, eps=0.2)
+        theta_ml, _ = utils.pca(sample, weights=np.ones(len(sample)))
+        theta_rrm = rrm.pca(sample, eps=0.4, theta_init=theta_init)
+        theta_sever = sever.pca(sample, eps=0.4, theta_init=theta_init)
+        theta_rlvi = rlvi.pca(sample, theta_init=theta_init)
+
+        ml_errors.append(1 - np.abs(theta_ml @ true_theta))
+        rrm_errors.append(1 - np.abs(theta_rrm @ true_theta))
+        sever_errors.append(1 - np.abs(theta_sever @ true_theta))
+        rlvi_errors.append(1 - np.abs(theta_rlvi @ true_theta))
+
+    errors_to_plot = {
+        "ML": ml_errors,
+        "SEVER": sever_errors,
+        "RRM": rrm_errors,
+        "RLVI": rlvi_errors
+    }
+
+    fig, ax = plt.subplots()
+    plt.rcParams["mathtext.fontset"] = "cm"
+    for i, errors in enumerate(errors_to_plot.values()):
+        plt.boxplot(errors, positions=[i + 1], **box_plot_props)
+    plt.xticks(range(1, len(errors_to_plot) + 1), errors_to_plot.keys(), font_properties=custom_font)
+    plt.ylabel("PCA\nmisalignment error", font_properties=custom_font)
+    plt.ylim(0, 0.061)
+    plt.yticks(np.arange(0, 0.061, 0.02), fontsize=20)
+    plt.text(0.95, 0.9, r"$\varepsilon=20\%$",
+             transform=ax.transAxes, fontsize=22,
+             ha='right', va='top', bbox=textbox_props)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/pca_boxplots.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def test_covariance():
+    print("Running test for covariance estimation")
+
+    true_cov = np.array([
+        [1, 0.8],
+        [0.8, 1]
+    ])
+
+    ml_errors = []
+    rrm_errors = []
+    sever_errors = []
+    rlvi_errors = []
+    MC_runs = 100
+    for _ in range(MC_runs):
+        samples = generate_data_cov(size=50, eps=0.2)
+        cov_ml = np.cov(samples, rowvar=False)
+        cov_rrm = rrm.covariance(samples, eps=0.4)
+        cov_sever = sever.covariance(samples, eps=0.4, numiter=3)
+        cov_rlvi = rlvi.covariance(samples, eps=0.4)  # use constraint due to unbounded likelihood
+        ml_errors.append(np.linalg.norm(true_cov - cov_ml, ord='fro') / np.linalg.norm(true_cov, ord='fro'))
+        rrm_errors.append(np.linalg.norm(true_cov - cov_rrm, ord='fro') / np.linalg.norm(true_cov, ord='fro'))
+        sever_errors.append(np.linalg.norm(true_cov - cov_sever, ord='fro') / np.linalg.norm(true_cov, ord='fro'))
+        rlvi_errors.append(np.linalg.norm(true_cov - cov_rlvi, ord='fro') / np.linalg.norm(true_cov, ord='fro'))
+
+    errors_to_plot = {
+        "ML": ml_errors,
+        "SEVER": sever_errors,
+        "RRM": rrm_errors,
+        "RLVI": rlvi_errors
+    }
+
+    fig, ax = plt.subplots()
+    plt.rcParams["mathtext.fontset"] = "cm"
+    for i, errors in enumerate(errors_to_plot.values()):
+        plt.boxplot(errors, positions=[i + 1], **box_plot_props)
+    plt.xticks(range(1, len(errors_to_plot) + 1), errors_to_plot.keys(), font_properties=custom_font)
+    plt.ylabel("Covariance\nrelative error", font_properties=custom_font)
+    plt.ylim(0, 3)
+    plt.yticks(list(range(4)), fontsize=20)
+    plt.text(0.95, 0.9, r"$\varepsilon=20\%$",
+             transform=ax.transAxes, fontsize=22,
+             ha='right', va='top', bbox=textbox_props)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig("plots/cov_boxplots.png", dpi=300)
+    plt.close()
+    print("\tDone\n")
+
+
+def main():
+    test_mean()
+    test_linear_regression()
+    test_linear_regression_fixed_eps()
+    test_logistic_regression_fixed_eps()
+    test_pca()
+    test_covariance()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/standard-learning/rlvi.py b/standard-learning/rlvi.py
new file mode 100644
index 00000000..cf88ef10
--- /dev/null
+++ b/standard-learning/rlvi.py
@@ -0,0 +1,144 @@
+import numpy as np
+from scipy import optimize as opt
+from scipy.linalg import lstsq
+
+import utils
+
+
+def update_weights(losses, tol=1e-3, maxiter=100):
+    '''Optimize Bernoulli probabilities'''
+    weights = 0.95 * np.ones_like(losses)
+    new_weights = np.copy(weights)
+    for _ in range(maxiter):
+        eps = 1 - np.mean(weights)
+        ratio = eps / (1 - eps)
+        new_weights = np.exp(-losses) / (ratio + np.exp(-losses))
+        error = np.linalg.norm(new_weights - weights)
+        weights = np.copy(new_weights)
+        if error < tol:
+            break
+    return new_weights
+
+
+def update_weights_constrained(losses, n_eff, tol=1e-3, maxiter=100):
+    """
+    Constraint optimization of Bernoulli probabilities
+    for the unbounded likelihood case.
+    Uses the constraint: `sum(weights) >= n_eff`.
+    """
+
+    n = len(losses)
+    # First, solve the unconstrained optimization
+    new_weights = update_weights(losses, tol=tol, maxiter=maxiter)
+    # Then, correct based on the KKT condition
+    def shift_obj(s):
+        return np.square(
+            np.sum(
+                np.exp(-losses + s) / ((n - n_eff)/n_eff + np.exp(-losses + s))
+            ) - n_eff
+        )
+    if np.sum(new_weights) < n_eff:
+        shift = opt.minimize_scalar(shift_obj)['x']
+        new_weights = np.exp(-losses + shift) / ((n - n_eff)/n_eff + np.exp(-losses + shift))
+    return new_weights
+
+
+def mean(sample, maxiter=100, tol=1e-3):
+    weights = np.ones(sample.shape[0])
+    theta = weights @ sample / np.sum(weights)
+    residuals = np.linalg.norm(theta - sample, axis=1)**2
+    sigma2 = weights @ residuals / np.sum(weights)
+    losses = 0.5 * residuals/sigma2
+
+    for _ in range(maxiter):
+        weights = update_weights(losses)
+        prev_theta = np.copy(theta)
+        theta = weights @ sample / np.sum(weights)
+        residuals = np.linalg.norm(theta - sample, axis=1)**2
+        sigma2 = weights @ residuals / np.sum(weights)
+        losses = 0.5 * residuals/sigma2
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def linear_regression(X, y, maxiter=100, tol=1e-3):
+    weights = np.ones(X.shape[0])
+    w_sqrt = np.diag(np.sqrt(weights))
+    theta = lstsq(w_sqrt @ X, w_sqrt @ y)[0]
+    residuals = (y - X @ theta)**2
+    sigma2 = weights @ residuals / np.sum(weights)
+    losses = 0.5 * residuals/sigma2
+
+    for _ in range(maxiter):
+        weights = update_weights(losses)
+        prev_theta = np.copy(theta)
+        w_sqrt = np.diag(np.sqrt(weights))
+        theta = lstsq(w_sqrt @ X, w_sqrt @ y)[0]
+        residuals = (y - X @ theta)**2
+        sigma2 = weights @ residuals / np.sum(weights)
+        losses = 0.5 * residuals/sigma2
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def logistic_regression(X, y, maxiter=100, tol=1e-2):
+    weights = np.ones(X.shape[0])
+    theta, losses = utils.sklearn_log_reg(X, y, weights)
+    # theta, losses = utils.mm_log_reg(X, y, weights)
+
+    for _ in range(maxiter):
+        weights = update_weights(losses)
+
+        prev_theta = np.copy(theta)
+        theta, losses = utils.sklearn_log_reg(X, y, weights)
+        # theta, losses = utils.mm_log_reg(X, y, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def pca(sample, maxiter=100, tol=1e-2, theta_init=None):
+    weights = np.ones(sample.shape[0])
+    theta, losses = utils.pca(sample, weights, theta_init)
+
+    for _ in range(maxiter):
+        weights = update_weights(losses)
+
+        prev_theta = np.copy(theta)
+        theta, losses = utils.pca(sample, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def covariance(sample, eps, maxiter=100, tol=1e-2):
+    n, d = sample.shape
+    n_eff = n * (1 - eps)
+
+    weights = np.ones(n)
+    theta, losses = utils.covariance(sample, weights)
+
+    for _ in range(maxiter):
+        weights = update_weights_constrained(losses, n_eff)
+        prev_theta = np.copy(theta)
+        theta, losses = utils.covariance(sample, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta, ord='fro') / np.linalg.norm(prev_theta, ord='fro')
+        if discrepancy <= tol:
+            break
+
+    return theta
diff --git a/standard-learning/rrm.py b/standard-learning/rrm.py
new file mode 100644
index 00000000..a7684b1b
--- /dev/null
+++ b/standard-learning/rrm.py
@@ -0,0 +1,124 @@
+"""
+Based on paper Robust Risk Minimization for Statistical Learning
+Osama et al., 2020
+"""
+import numpy as np
+from scipy import optimize as opt
+from scipy.linalg import lstsq
+
+import utils
+
+
+def update_weights(losses, eps):
+    '''Optimize sample weights for Robust Risk Minimization'''
+    res = np.copy(losses)
+    t = -np.log((1 - eps) * res.shape[0])
+    numeric_cutoff = 1e-16
+    def objective(xi):
+        phi = np.exp(-res * np.exp(-xi))
+        phi[phi < numeric_cutoff] = numeric_cutoff
+        sum_phi = np.sum(phi)
+        return np.exp(xi) * (np.log(sum_phi) + t)
+
+    opt_res = opt.minimize_scalar(objective)
+    opt_xi = opt_res['x']
+    opt_alpha = np.exp(opt_xi)
+
+    phi = np.exp(-res / opt_alpha)
+    phi[phi < numeric_cutoff] = numeric_cutoff
+    sum_phi = np.sum(phi)
+    opt_beta_over_alpha = np.log(sum_phi) - 1
+
+    opt_weights = np.exp(-res / opt_alpha) * np.exp(-opt_beta_over_alpha - 1)
+    return opt_weights
+
+
+def mean(sample, eps, maxiter=100, tol=1e-3):
+    weights = np.ones(sample.shape[0]) / sample.shape[0]
+    theta = weights @ sample / np.sum(weights)
+    losses = np.linalg.norm(theta - sample, axis=1)**2
+
+    for _ in range(maxiter):
+        weights = update_weights(losses, eps)
+
+        prev_theta = np.copy(theta)
+        theta = weights @ sample / np.sum(weights)
+        losses = np.linalg.norm(theta - sample, axis=1)**2
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def linear_regression(X, y, eps, maxiter=100, tol=1e-3):
+    weights = np.ones(X.shape[0]) / X.shape[0]
+    w_sqrt = np.diag(np.sqrt(weights))
+    theta = lstsq(w_sqrt @ X, w_sqrt @ y)[0]
+    losses = (y - X @ theta)**2
+
+    for _ in range(maxiter):
+        weights = update_weights(losses, eps)
+
+        prev_theta = np.copy(theta)
+        w_sqrt = np.diag(np.sqrt(weights))
+        theta = lstsq(w_sqrt @ X, w_sqrt @ y)[0]
+        losses = (y - X @ theta)**2
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def logistic_regression(X, y, eps, maxiter=100, tol=1e-2):
+    weights = np.ones(X.shape[0]) / X.shape[0]
+    theta, losses = utils.sklearn_log_reg(X, y, weights)
+    # theta, losses = utils.mm_log_reg(X, y, weights)
+    for _ in range(maxiter):
+        weights = update_weights(losses, eps)
+
+        prev_theta = np.copy(theta)
+        theta, losses = utils.sklearn_log_reg(X, y, weights)
+        # theta, losses = utils.mm_log_reg(X, y, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def pca(sample, eps, maxiter=100, tol=1e-2, theta_init=None):
+    weights = np.ones(sample.shape[0]) / sample.shape[0]
+    theta, losses = utils.pca(sample, weights, theta_init)
+    for _ in range(maxiter):
+        weights = update_weights(losses, eps)
+
+        prev_theta = np.copy(theta)
+        theta, losses = utils.pca(sample, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta) / np.linalg.norm(prev_theta)
+        if discrepancy <= tol:
+            break
+
+    return theta
+
+
+def covariance(sample, eps, maxiter=100, tol=1e-2):
+    weights = np.ones(sample.shape[0]) / sample.shape[0]
+    theta, losses = utils.covariance(sample, weights)
+
+    for _ in range(maxiter):
+        weights = update_weights(losses, eps)
+
+        prev_theta = np.copy(theta)
+        theta, losses = utils.covariance(sample, weights)
+
+        discrepancy = np.linalg.norm(theta - prev_theta, ord='fro') / np.linalg.norm(prev_theta, ord='fro')
+        if discrepancy <= tol:
+            break
+
+    return theta
diff --git a/standard-learning/sever.py b/standard-learning/sever.py
new file mode 100644
index 00000000..df5b12c8
--- /dev/null
+++ b/standard-learning/sever.py
@@ -0,0 +1,156 @@
+"""
+Based on paper SEVER: a robust meta-algorithm for stochastic optimization
+Ilias Diakonikolas et al., 2019
+"""
+import numpy as np
+from scipy.linalg import lstsq
+import utils
+
+
+def linear_regression(X, y, eps, numiter=4):
+    n = len(y)
+    s = range(n)  # active set
+    # X and y corresponding to active set
+    Xs = X[s, :]
+    ys = y[s]
+        
+    for _ in range(numiter):
+        # base learner: least-square
+        theta = lstsq(Xs, ys)[0]
+        
+        # gradients
+        G_uncen = 2 * (Xs @ theta - ys)[:, np.newaxis] * Xs
+        G_cen = G_uncen - np.mean(G_uncen, axis=0)
+
+        # top right singular vector
+        V = np.linalg.svd(G_cen)[-1]
+        v = V[:, 0]
+        
+        # outlier scores
+        tau = (G_cen @ v)**2
+        
+        # remove p points with highest scores
+        p = int(eps / 2 * Xs.shape[0])
+        idx = np.argsort(-tau)
+        
+        # new active set
+        s = idx[p:]
+        Xs = Xs[s, :]
+        ys = ys[s]
+
+    return theta
+
+
+def logistic_regression(X, y, eps, numiter=4):
+    n = len(y)
+    s = range(n)  # active set
+    X = np.hstack([np.ones((X.shape[0], 1)), X])
+    # X and y corresponding to active set
+    Xs = X[s, :]
+    ys = y[s]
+
+    for _ in range(numiter):
+        # base learner: logistic regression
+        theta, _ = utils.sklearn_log_reg(Xs[:, 1:], ys, weights=np.ones(len(ys)))
+        # theta, _ = utils.mm_log_reg(Xs[:, 1:], ys, weights=np.ones(len(ys)))
+
+        # gradients
+        phi = Xs @ theta
+        proba = utils.sigmoid(phi)
+        G_uncen = (-ys * (Xs @ theta) + proba)[:, np.newaxis] * Xs
+        G_cen = G_uncen - np.mean(G_uncen, axis=0)
+
+        # top right singular vector
+        V = np.linalg.svd(G_cen)[-1]
+        v = V[:, 0]
+
+        # outlier scores
+        tau = (G_cen @ v)**2
+        
+        # remove p points with highest scores
+        p = int(eps / 2 * Xs.shape[0])
+        idx = np.argsort(-tau)
+        
+        # new active set
+        s = idx[p:]
+        Xs = Xs[s, :]
+        ys = ys[s]
+
+    return theta
+
+
+def pca(samples, eps, numiter=4, theta_init=None):
+    n = len(samples)
+    s = range(n)  # active set
+    # samples corresponding to active set
+    samples_s = samples[s, :]
+
+    for k in range(numiter):
+        # base learner: pca
+        if (theta_init is not None) and (k == 0):
+            theta = theta_init
+        else:
+            theta, _ = utils.pca(samples_s, weights=np.ones(len(samples_s)))
+
+        # gradients
+        G_uncen = -2 * ((samples_s @ theta)[:, np.newaxis] * samples_s)
+        G_cen = G_uncen - np.mean(G_uncen, axis=0)
+
+        # top right singular vector
+        V = np.linalg.svd(G_cen)[-1]
+        v = V[:, 0]
+
+        # outlier scores
+        tau = (G_cen @ v)**2
+        
+        # remove p points with highest scores
+        p = int(eps / 2 * samples_s.shape[0])
+        idx = np.argsort(-tau)
+        
+        # new active set
+        s = idx[p:]
+        samples_s = samples_s[s, :]
+
+    return theta
+
+
+def covariance(samples, eps, numiter=4):
+    n, d = samples.shape
+    s = range(n)  # active set
+    # samples corresponding to active set
+    samples_s = samples[s, :]
+
+    for _ in range(numiter):
+        # base learner: sample covariance
+        mean = np.mean(samples_s, axis=0)
+        cov = np.cov(samples_s, rowvar=False)
+        cov_inv = np.linalg.inv(cov)
+
+        # gradients
+        G_uncen = np.zeros((samples_s.shape[0], d + d**2))
+        for row in range(len(samples_s)):
+            G_uncen[row, :d] = cov_inv @ (mean - samples_s[row, :])
+            centered = (samples_s[row, :] - mean).reshape((-1, 1))
+            row_cov = centered @ centered.T
+            g = cov_inv @ (np.eye(d) - row_cov / cov) / 2.
+            g = g.reshape(-1)
+            G_uncen[row, d:] = g
+
+        G_cen = G_uncen - np.mean(G_uncen, axis=0)
+
+        # top right singular vector
+        V = np.linalg.svd(G_cen)[-1]
+        v = V[:, 0]
+
+        # outlier scores
+        tau = (G_cen @ v)**2
+        
+        # remove p points with highest scores
+        p = int(eps / 2 * samples_s.shape[0])
+        idx = np.argsort(-tau)
+        
+        # new active set
+        s = idx[p:]
+        samples_s = samples_s[s, :]
+
+    return cov
\ No newline at end of file
diff --git a/standard-learning/utils.py b/standard-learning/utils.py
new file mode 100644
index 00000000..9d329ad8
--- /dev/null
+++ b/standard-learning/utils.py
@@ -0,0 +1,108 @@
+import numpy as np
+from scipy.linalg import lstsq
+from sklearn.linear_model import LogisticRegression
+from sklearn.decomposition import PCA
+
+
+def sigmoid(x):
+    '''Numerically stable logistic function'''
+    pos_mask = (x >= 0)
+    neg_mask = (x < 0)
+    z = np.zeros_like(x)
+    z[pos_mask] = np.exp(-x[pos_mask])
+    z[neg_mask] = np.exp(x[neg_mask])
+    top = np.ones_like(x)
+    top[neg_mask] = z[neg_mask]
+    return top / (1 + z)
+
+
+def cross_entropy(X, theta, y):
+    phi = X @ theta
+    return -y * phi + phi + np.log1p(np.exp(-phi))
+
+
+def clf_predict(X, theta, augment=True):
+    if augment:
+        X = np.hstack([np.ones((X.shape[0], 1)), X])
+    phi = X @ theta
+    proba = sigmoid(phi)
+    return np.array(proba > 0.5, dtype=int)
+
+
+def mm_log_reg(X, y, weights):
+    X = np.hstack([np.ones((X.shape[0], 1)), X])
+    theta0 = np.zeros(X.shape[1])
+
+    X_tilde = 0.5 * X * np.sqrt(weights)[:, None]
+    Q = X_tilde.T @ X_tilde
+    Q_inv = np.linalg.inv(Q)
+
+    def grad(theta):
+        return X.T @ (weights * (sigmoid(X @ theta) - y))
+
+    def delta(theta):
+        g = grad(theta)
+        return -Q_inv @ g
+
+    theta1 = theta0 + delta(theta0)
+
+    while True:
+        if np.linalg.norm(theta1 - theta0) > 1e-2:
+            theta0 = theta1
+            theta1 = theta0 + delta(theta0)
+        else:
+            theta_best = theta1
+            break
+
+    losses = cross_entropy(X, theta_best, y)
+    return theta_best, losses
+
+
+def sklearn_log_reg(X, y, weights, reg_coeff=1e2):
+    def get_losses(classifier):
+        log_proba = classifier.predict_log_proba(X).T[0]
+        return -y * log_proba - (1 - y) * log_proba
+
+    weights /= np.max(weights)
+    clf = LogisticRegression(solver="liblinear", C=reg_coeff)
+    clf.fit(X, y, sample_weight=weights)
+    theta = np.hstack([clf.intercept_.flatten(), clf.coef_.flatten()])
+
+    # Compute loss on samples
+    losses = get_losses(clf)
+    return theta, losses
+
+
+def pca(samples, weights, theta=None):
+    def get_losses(princ_comp):
+        proj = samples @ princ_comp
+        return np.sum(samples**2, axis=1) - proj**2
+
+    if theta is None:
+        pca_ = PCA(n_components=1)
+        pca_.fit(np.diag(weights) @ samples)
+        theta = pca_.components_[0]
+        theta /= np.linalg.norm(theta)
+
+    # Compute loss on samples
+    losses = get_losses(theta)
+    return theta, losses
+
+
+def covariance(samples, weights, mean=None):
+    def get_losses(mean, cov, weights):
+        d = mean.shape[0]
+        centered_samples = samples - mean
+        scaled_samples = lstsq(cov, centered_samples.T)[0]
+        residuals = np.sum(centered_samples * scaled_samples.T, axis=1)
+        (sign, logabsdet) = np.linalg.slogdet(cov)
+        if sign <= 0:
+            raise ValueError("Singular covariance matrix")
+        return 0.5*(residuals + logabsdet + d*np.log(2*np.pi))
+
+    mean = samples.T @ weights / np.sum(weights)
+    centered_samples = samples - mean
+    cov = centered_samples.T @ np.diag(weights) @ centered_samples / np.sum(weights)
+    # Compute loss on samples
+    losses = get_losses(mean, cov, weights)
+    return cov, losses