diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..3de4ad5 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,27 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Baseline", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/main.py", + "args": [ + "--scenario=multi-task-nc", + "--sub_dir=baseline-debug" + ], + "console": "integratedTerminal" + }, + { + "name": "EWC", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/main_ewc.py", + "args": [ + "--scenario=multi-task-nc", + "--sub_dir=ewc-debug" + ], + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d64442e --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Setup + +```bash +sh fetch_data_and_setup.sh +conda env create -f environment.yml +conda activate clvision-challenge +sh create_submission.sh +``` + +# Run + +```bash +python main.py --scenario="multi-task-nc" --epochs="5" --sub_dir="baseline" +python main_ewc.py --scenario="multi-task-nc" --epochs="5" --sub_dir="ewc" +``` diff --git a/ewc/elastic_weight_consolidation.py b/ewc/elastic_weight_consolidation.py new file mode 100644 index 0000000..82293ae --- /dev/null +++ b/ewc/elastic_weight_consolidation.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch import autograd +import numpy as np +from torch.utils.data import DataLoader + + +class ElasticWeightConsolidation: + + def __init__(self, model, crit, lr=0.001, weight=1000000): + self.model = model + self.weight = weight + self.crit = crit + self.optimizer = optim.Adam(self.model.parameters(), lr) + + def _update_mean_params(self): + for param_name, param in self.model.named_parameters(): + _buff_param_name = param_name.replace('.', '__') + self.model.register_buffer(_buff_param_name+'_estimated_mean', param.data.clone()) + + def _update_fisher_params(self, current_ds, batch_size, num_batch): + dl = DataLoader(current_ds, batch_size, shuffle=True) + log_liklihoods = [] + for i, (input, target) in enumerate(dl): + if i > num_batch: + break + output = F.log_softmax(self.model(input), dim=1) + log_liklihoods.append(output[:, target]) + log_likelihood = torch.cat(log_liklihoods).mean() + grad_log_liklihood = autograd.grad(log_likelihood, self.model.parameters()) + _buff_param_names = [param[0].replace('.', '__') for param in self.model.named_parameters()] + for _buff_param_name, param in zip(_buff_param_names, grad_log_liklihood): + self.model.register_buffer(_buff_param_name+'_estimated_fisher', param.data.clone() ** 2) + + def register_ewc_params(self, dataset, batch_size, num_batches): + self._update_fisher_params(dataset, batch_size, num_batches) + self._update_mean_params() + + def _compute_consolidation_loss(self, weight): + try: + losses = [] + for param_name, param in self.model.named_parameters(): + _buff_param_name = param_name.replace('.', '__') + estimated_mean = getattr(self.model, '{}_estimated_mean'.format(_buff_param_name)) + estimated_fisher = getattr(self.model, '{}_estimated_fisher'.format(_buff_param_name)) + losses.append((estimated_fisher * (param - estimated_mean) ** 2).sum()) + return (weight / 2) * sum(losses) + except AttributeError: + return 0 + + def forward_backward_update(self, input, target): + output = self.model(input) + loss = self._compute_consolidation_loss(self.weight) + self.crit(output, target) + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + def save(self, filename): + torch.save(self.model, filename) + + def load(self, filename): + self.model = torch.load(filename) diff --git a/main.py b/main.py new file mode 100644 index 0000000..a55de22 --- /dev/null +++ b/main.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +################################################################################ +# Copyright (c) 2019. Vincenzo Lomonaco, Massimo Caccia, Pau Rodriguez, # +# Lorenzo Pellegrini. All rights reserved. # +# Copyrights licensed under the CC BY 4.0 License. # +# See the accompanying LICENSE file for terms. # +# # +# Date: 1-02-2019 # +# Author: Vincenzo Lomonaco # +# E-mail: vincenzo.lomonaco@unibo.it # +# Website: vincenzolomonaco.com # +################################################################################ + +""" + +Getting Started example for the CVPR 2020 CLVision Challenge. It will load the +data and create the submission file for you in the +cvpr_clvision_challenge/submissions directory. + +""" + +# Python 2-3 compatible +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import + +import argparse +import os +import time +import copy +from core50.dataset import CORE50 +import torch +import numpy as np +from utils.train_test import train_net, test_multitask, preprocess_imgs +import torchvision.models as models +from utils.common import create_code_snapshot + + +def main(args): + + # print args recap + print(args, end="\n\n") + + # do not remove this line + start = time.time() + + # Create the dataset object for example with the "ni, multi-task-nc, or nic + # tracks" and assuming the core50 location in ./core50/data/ + dataset = CORE50(root='core50/data/', scenario=args.scenario, + preload=args.preload_data) + + # Get the validation set + print("Recovering validation set...") + full_valdidset = dataset.get_full_valid_set() + + # model + if args.classifier == 'ResNet18': + classifier = models.resnet18(pretrained=True) + classifier.fc = torch.nn.Linear(512, args.n_classes) + + opt = torch.optim.SGD(classifier.parameters(), lr=args.lr) + criterion = torch.nn.CrossEntropyLoss() + + # vars to update over time + valid_acc = [] + ext_mem_sz = [] + ram_usage = [] + heads = [] + ext_mem = None + + # loop over the training incremental batches (x, y, t) + for i, train_batch in enumerate(dataset): + train_x, train_y, t = train_batch + + # adding eventual replay patterns to the current batch + idxs_cur = np.random.choice( + train_x.shape[0], args.replay_examples, replace=False + ) + + if i == 0: + ext_mem = [train_x[idxs_cur], train_y[idxs_cur]] + else: + ext_mem = [ + np.concatenate((train_x[idxs_cur], ext_mem[0])), + np.concatenate((train_y[idxs_cur], ext_mem[1]))] + + train_x = np.concatenate((train_x, ext_mem[0])) + train_y = np.concatenate((train_y, ext_mem[1])) + + print("----------- batch {0} -------------".format(i)) + print("x shape: {0}, y shape: {1}" + .format(train_x.shape, train_y.shape)) + print("Task Label: ", t) + + # train the classifier on the current batch/task + _, _, stats = train_net( + opt, classifier, criterion, args.batch_size, train_x, train_y, t, + args.epochs, preproc=preprocess_imgs + ) + if args.scenario == "multi-task-nc": + heads.append(copy.deepcopy(classifier.fc)) + + # collect statistics + ext_mem_sz += stats['disk'] + ram_usage += stats['ram'] + + # test on the validation set + stats, _ = test_multitask( + classifier, full_valdidset, args.batch_size, + preproc=preprocess_imgs, multi_heads=heads, verbose=False + ) + + valid_acc += stats['acc'] + print("------------------------------------------") + print("Avg. acc: {}".format(stats['acc'])) + print("------------------------------------------") + + # Generate submission.zip + # directory with the code snapshot to generate the results + sub_dir = 'submissions/' + args.sub_dir + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + # copy code + create_code_snapshot(".", sub_dir + "/code_snapshot") + + # generating metadata.txt: with all the data used for the CLScore + elapsed = (time.time() - start) / 60 + print("Training Time: {}m".format(elapsed)) + with open(sub_dir + "/metadata.txt", "w") as wf: + for obj in [ + np.average(valid_acc), elapsed, np.average(ram_usage), + np.max(ram_usage), np.average(ext_mem_sz), np.max(ext_mem_sz) + ]: + wf.write(str(obj) + "\n") + print(f'Average Accuracy Over Time on the Validation Set: {np.average(valid_acc)}') + print(f'Total Training/Test time: {elapsed} Minutes') + print(f'Average RAM Usage: {np.average(ram_usage)} MB') + print(f'Max RAM Usage: {np.max(ram_usage)} MB') + + print("Experiment completed.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser('CVPR Continual Learning Challenge') + + # General + parser.add_argument('--scenario', type=str, default="multi-task-nc", + choices=['ni', 'multi-task-nc', 'nic']) + parser.add_argument('--preload_data', type=bool, default=True, + help='preload data into RAM') + parser.add_argument('--no_preload_data', dest='preload_data', + action='store_false') + + # Model + parser.add_argument('-cls', '--classifier', type=str, default='ResNet18', + choices=['ResNet18']) + + # Optimization + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate') + parser.add_argument('--batch_size', type=int, default=32, + help='batch_size') + parser.add_argument('--epochs', type=int, default=1, + help='number of epochs') + + # Continual Learning + parser.add_argument('--replay_examples', type=int, default=0, + help='data examples to keep in memory for each batch ' + 'for replay.') + + # Misc + parser.add_argument('--sub_dir', type=str, default="multi-task-nc", + help='directory of the submission file for this exp.') + + args = parser.parse_args() + args.n_classes = 50 + args.input_size = [3, 128, 128] + + args.cuda = torch.cuda.is_available() + args.device = 'cuda:0' if args.cuda else 'cpu' + + main(args) diff --git a/main_ewc.py b/main_ewc.py new file mode 100644 index 0000000..70e0080 --- /dev/null +++ b/main_ewc.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +################################################################################ +# Copyright (c) 2019. Vincenzo Lomonaco, Massimo Caccia, Pau Rodriguez, # +# Lorenzo Pellegrini. All rights reserved. # +# Copyrights licensed under the CC BY 4.0 License. # +# See the accompanying LICENSE file for terms. # +# # +# Date: 1-02-2019 # +# Author: Vincenzo Lomonaco # +# E-mail: vincenzo.lomonaco@unibo.it # +# Website: vincenzolomonaco.com # +################################################################################ + +""" + +Getting Started example for the CVPR 2020 CLVision Challenge. It will load the +data and create the submission file for you in the +cvpr_clvision_challenge/submissions directory. + +""" + +# Python 2-3 compatible +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import + +import argparse +import os +import time +import copy +from core50.dataset import CORE50 +import torch +import numpy as np +from utils.train_test_ewc import train_net_ewc, test_multitask, preprocess_imgs +import torchvision.models as models +from utils.common import create_code_snapshot + +from ewc.elastic_weight_consolidation import ElasticWeightConsolidation + +def main(args): + + # print args recap + print(args, end="\n\n") + + # do not remove this line + start = time.time() + + # Create the dataset object for example with the "ni, multi-task-nc, or nic + # tracks" and assuming the core50 location in ./core50/data/ + dataset = CORE50(root='core50/data/', scenario=args.scenario, + preload=args.preload_data) + + # Get the validation set + print("Recovering validation set...") + full_valdidset = dataset.get_full_valid_set() + + # model + if args.classifier == 'ResNet18Ewc': + classifier = models.resnet18(pretrained=True) + classifier.fc = torch.nn.Linear(512, args.n_classes) + + opt = torch.optim.SGD(classifier.parameters(), lr=args.lr) + criterion = torch.nn.CrossEntropyLoss() + ewc = ElasticWeightConsolidation(classifier, crit=criterion, lr=1e-4, weight=100) + + # vars to update over time + valid_acc = [] + ext_mem_sz = [] + ram_usage = [] + heads = [] + ext_mem = None + + # loop over the training incremental batches (x, y, t) + for i, train_batch in enumerate(dataset): + train_x, train_y, t = train_batch + + # adding eventual replay patterns to the current batch + idxs_cur = np.random.choice( + train_x.shape[0], args.replay_examples, replace=False + ) + + if i == 0: + ext_mem = [train_x[idxs_cur], train_y[idxs_cur]] + else: + ext_mem = [ + np.concatenate((train_x[idxs_cur], ext_mem[0])), + np.concatenate((train_y[idxs_cur], ext_mem[1]))] + + train_x = np.concatenate((train_x, ext_mem[0])) + train_y = np.concatenate((train_y, ext_mem[1])) + + print("----------- batch {0} -------------".format(i)) + print("x shape: {0}, y shape: {1}" + .format(train_x.shape, train_y.shape)) + print("Task Label: ", t) + + # train the classifier on the current batch/task + _, _, stats, preprocessed_dataset = train_net_ewc( + opt, ewc, criterion, args.batch_size, train_x, train_y, t, + args.epochs, preproc=preprocess_imgs + ) + ewc.register_ewc_params(preprocessed_dataset, args.batch_size, dataset.nbatch[dataset.scenario]) + + if args.scenario == "multi-task-nc": + heads.append(copy.deepcopy(classifier.fc)) + + # collect statistics + ext_mem_sz += stats['disk'] + ram_usage += stats['ram'] + + # test on the validation set + stats, _ = test_multitask( + classifier, full_valdidset, args.batch_size, + preproc=preprocess_imgs, multi_heads=heads, verbose=False + ) + + valid_acc += stats['acc'] + print("------------------------------------------") + print("Avg. acc: {}".format(stats['acc'])) + print("------------------------------------------") + + # Generate submission.zip + # directory with the code snapshot to generate the results + sub_dir = 'submissions/' + args.sub_dir + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + # copy code + create_code_snapshot(".", sub_dir + "/code_snapshot") + + # generating metadata.txt: with all the data used for the CLScore + elapsed = (time.time() - start) / 60 + print("Training Time: {}m".format(elapsed)) + with open(sub_dir + "/metadata.txt", "w") as wf: + for obj in [ + np.average(valid_acc), elapsed, np.average(ram_usage), + np.max(ram_usage), np.average(ext_mem_sz), np.max(ext_mem_sz) + ]: + wf.write(str(obj) + "\n") + print(f'Average Accuracy Over Time on the Validation Set: {np.average(valid_acc)}') + print(f'Total Training/Test time: {elapsed} Minutes') + print(f'Average RAM Usage: {np.average(ram_usage)} MB') + print(f'Max RAM Usage: {np.max(ram_usage)} MB') + + print("Experiment completed.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser('CVPR Continual Learning Challenge') + + # General + parser.add_argument('--scenario', type=str, default="multi-task-nc", + choices=['ni', 'multi-task-nc', 'nic']) + parser.add_argument('--preload_data', type=bool, default=True, + help='preload data into RAM') + parser.add_argument('--no_preload_data', dest='preload_data', + action='store_false') + + # Model + parser.add_argument('-cls', '--classifier', type=str, default='ResNet18Ewc', + choices=['ResNet18Ewc']) + + # Optimization + parser.add_argument('--lr', type=float, default=0.01, + help='learning rate') + parser.add_argument('--batch_size', type=int, default=32, + help='batch_size') + parser.add_argument('--epochs', type=int, default=1, + help='number of epochs') + + # Continual Learning + parser.add_argument('--replay_examples', type=int, default=0, + help='data examples to keep in memory for each batch ' + 'for replay.') + + # Misc + parser.add_argument('--sub_dir', type=str, default="multi-task-nc", + help='directory of the submission file for this exp.') + + args = parser.parse_args() + args.n_classes = 50 + args.input_size = [3, 128, 128] + + args.cuda = torch.cuda.is_available() + args.device = 'cuda:0' if args.cuda else 'cpu' + + main(args) diff --git a/proj.sh b/proj.sh new file mode 100644 index 0000000..65354dc --- /dev/null +++ b/proj.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +conda activate clvision-challenge +( +python main.py --scenario="multi-task-nc" --epochs="5" --sub_dir="baseline" +>> base.log &) +( +python main_ewc.py --scenario="multi-task-nc" --epochs="5" --sub_dir="ewc" +>> ewc.log &) diff --git a/utils/train_test_ewc.py b/utils/train_test_ewc.py new file mode 100644 index 0000000..c20d3cc --- /dev/null +++ b/utils/train_test_ewc.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +################################################################################ +# Copyright (c) 2019. Vincenzo Lomonaco, Massimo Caccia, Pau Rodriguez, # +# Lorenzo Pellegrini. All rights reserved. # +# Copyrights licensed under the CC BY 4.0 License. # +# See the accompanying LICENSE file for terms. # +# # +# Date: 8-11-2019 # +# Author: Vincenzo Lomonaco # +# E-mail: vincenzo.lomonaco@unibo.it # +# Website: vincenzolomonaco.com # +################################################################################ + +""" +General useful functions for machine learning with Pytorch. +""" + +# Python 2-3 compatible +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import + +import numpy as np +import torch +from torch.autograd import Variable +from .common import pad_data, shuffle_in_unison, check_ext_mem, check_ram_usage + +from ewc.elastic_weight_consolidation import ElasticWeightConsolidation +from torch.utils.data import TensorDataset + +def train_net_ewc(optimizer, ewc, criterion, mb_size, x, y, t, + train_ep, preproc=None, use_cuda=True, mask=None): + """ + (EWC Version) + Train a Pytorch model from pre-loaded tensors. + + Args: + optimizer (object): the pytorch optimizer. + model (object): the pytorch model to train. + criterion (func): loss function. + mb_size (int): mini-batch size. + x (tensor): train data. + y (tensor): train labels. + t (int): task label. + train_ep (int): number of training epochs. + preproc (func): test iterations. + use_cuda (bool): if we want to use gpu or cpu. + mask (bool): if we want to maks out some classes from the results. + Returns: + ave_loss (float): average loss across the train set. + acc (float): average accuracy over training. + stats (dict): dictionary of several stats collected. + """ + + cur_ep = 0 + cur_train_t = t + stats = {"ram": [], "disk": []} + + if preproc: + x = preproc(x) + + (train_x, train_y), it_x_ep = pad_data( + [x, y], mb_size + ) + + shuffle_in_unison( + [train_x, train_y], 0, in_place=True + ) + + criterion = ewc.crit + model = ewc.model + model = maybe_cuda(model, use_cuda=use_cuda) + acc = None + ave_loss = 0 + + train_x = torch.from_numpy(train_x).type(torch.FloatTensor) + train_y = torch.from_numpy(train_y).type(torch.LongTensor) + + for ep in range(train_ep): + + stats['disk'].append(check_ext_mem("cl_ext_mem")) + stats['ram'].append(check_ram_usage()) + + model.active_perc_list = [] + model.train() + + print("training ep: ", ep) + correct_cnt, ave_loss = 0, 0 + for it in range(it_x_ep): + + start = it * mb_size + end = (it + 1) * mb_size + + optimizer.zero_grad() + + x_mb = maybe_cuda(train_x[start:end], use_cuda=use_cuda) + y_mb = maybe_cuda(train_y[start:end], use_cuda=use_cuda) + logits = model(x_mb) + + _, pred_label = torch.max(logits, 1) + correct_cnt += (pred_label == y_mb).sum() + loss = criterion(logits, y_mb) + ewc._compute_consolidation_loss(ewc.weight) + ave_loss += loss.item() + + loss.backward() + optimizer.step() + + acc = correct_cnt.item() / \ + ((it + 1) * y_mb.size(0)) + ave_loss /= ((it + 1) * y_mb.size(0)) + + if it % 100 == 0: + print( + '==>>> it: {}, avg. loss: {:.6f}, ' + 'running train acc: {:.3f}' + .format(it, ave_loss, acc) + ) + + cur_ep += 1 + + return ave_loss, acc, stats, TensorDataset(maybe_cuda(train_x, use_cuda=use_cuda), + maybe_cuda(train_y, use_cuda=use_cuda)) + + +def preprocess_imgs(img_batch, scale=True, norm=True, channel_first=True): + """ + Here we get a batch of PIL imgs and we return them normalized as for + the pytorch pre-trained models. + + Args: + img_batch (tensor): batch of images. + scale (bool): if we want to scale the images between 0 an 1. + channel_first (bool): if the channel dimension is before of after + the other dimensions (width and height). + norm (bool): if we want to normalize them. + Returns: + tensor: pre-processed batch. + + """ + + if scale: + # convert to float in [0, 1] + img_batch = img_batch / 255 + + if norm: + # normalize + img_batch[:, :, :, 0] = ((img_batch[:, :, :, 0] - 0.485) / 0.229) + img_batch[:, :, :, 1] = ((img_batch[:, :, :, 1] - 0.456) / 0.224) + img_batch[:, :, :, 2] = ((img_batch[:, :, :, 2] - 0.406) / 0.225) + + if channel_first: + # Swap channel dimension to fit the caffe format (c, w, h) + img_batch = np.transpose(img_batch, (0, 3, 1, 2)) + + return img_batch + + +def maybe_cuda(what, use_cuda=True, **kw): + """ + Moves `what` to CUDA and returns it, if `use_cuda` and it's available. + + Args: + what (object): any object to move to eventually gpu + use_cuda (bool): if we want to use gpu or cpu. + Returns + object: the same object but eventually moved to gpu. + """ + + if use_cuda is not False and torch.cuda.is_available(): + what = what.cuda() + return what + + +def test_multitask( + model, test_set, mb_size, preproc=None, use_cuda=True, multi_heads=[], verbose=True): + """ + Test a model considering that the test set is composed of multiple tests + one for each task. + + Args: + model (nn.Module): the pytorch model to test. + test_set (list): list of (x,y,t) test tuples. + mb_size (int): mini-batch size. + preproc (func): image preprocess function. + use_cuda (bool): if we want to use gpu or cpu. + multi_heads (list): ordered list of "heads" to be used for each + task. + Returns: + stats (float): collected stasts of the test including average and + per class accuracies. + """ + + model.eval() + + acc_x_task = [] + stats = {'accs': [], 'acc': []} + preds = [] + + for (x, y), t in test_set: + + if preproc: + x = preproc(x) + + if multi_heads != [] and len(multi_heads) > t: + # we can use the stored head + if verbose: + print("Using head: ", t) + with torch.no_grad(): + model.fc.weight.copy_(multi_heads[t].weight) + model.fc.bias.copy_(multi_heads[t].bias) + + model = maybe_cuda(model, use_cuda=use_cuda) + acc = None + + test_x = torch.from_numpy(x).type(torch.FloatTensor) + test_y = torch.from_numpy(y).type(torch.LongTensor) + + correct_cnt, ave_loss = 0, 0 + + with torch.no_grad(): + + iters = test_y.size(0) // mb_size + 1 + for it in range(iters): + + start = it * mb_size + end = (it + 1) * mb_size + + x_mb = maybe_cuda(test_x[start:end], use_cuda=use_cuda) + y_mb = maybe_cuda(test_y[start:end], use_cuda=use_cuda) + logits = model(x_mb) + + _, pred_label = torch.max(logits, 1) + correct_cnt += (pred_label == y_mb).sum() + preds += list(pred_label.data.cpu().numpy()) + + # print(pred_label) + # print(y_mb) + acc = correct_cnt.item() / test_y.shape[0] + + if verbose: + print('TEST Acc. Task {}==>>> acc: {:.3f}'.format(t, acc)) + acc_x_task.append(acc) + stats['accs'].append(acc) + + stats['acc'].append(np.mean(acc_x_task)) + + if verbose: + print("------------------------------------------") + print("Avg. acc:", stats['acc']) + print("------------------------------------------") + + # reset the head for the next batch + if multi_heads: + if verbose: + print("classifier reset...") + with torch.no_grad(): + model.fc.weight.fill_(0) + model.fc.bias.fill_(0) + + return stats, preds