diff --git a/.circleci/config.yml b/.circleci/config.yml index 5dbacab5cb..e97dc70a65 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: steps: - checkout - restore_cache: - key: v2-<< parameters.version >>-min-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-<< parameters.version >>-min-dependency-cache-{{ checksum "dev-requirements.txt" }} - run: name: install python dependencies command: | @@ -22,7 +22,7 @@ jobs: pip install --upgrade pip pip install -r dev-requirements.txt - save_cache: - key: v2-<< parameters.version >>-min-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-<< parameters.version >>-min-dependency-cache-{{ checksum "dev-requirements.txt" }} paths: - "venv" - run: @@ -45,7 +45,7 @@ jobs: steps: - checkout - restore_cache: - key: v2-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} - run: name: install python dependencies command: | @@ -54,7 +54,7 @@ jobs: pip install -r dev-requirements.txt pip install -e . - save_cache: - key: v2-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} paths: - "venv" - run: @@ -82,7 +82,7 @@ jobs: steps: - checkout - restore_cache: - key: v2-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} - run: name: install python dependencies command: | @@ -91,7 +91,7 @@ jobs: pip install -r dev-requirements.txt pip install -e . - save_cache: - key: v2-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} + key: v3-3.9-dependency-cache-{{ checksum "dev-requirements.txt" }} paths: - "venv" - run: diff --git a/benchmarks/cifar_exp/experiment_varying_task_sample.py b/benchmarks/cifar_exp/experiment_varying_task_sample.py index ffcc33be9d..ba9df30d78 100644 --- a/benchmarks/cifar_exp/experiment_varying_task_sample.py +++ b/benchmarks/cifar_exp/experiment_varying_task_sample.py @@ -1,474 +1,474 @@ -#%% -import random -import matplotlib.pyplot as plt -import tensorflow as tf -import keras -from keras import layers -from itertools import product -import pandas as pd - -import numpy as np -import pickle - -from sklearn.model_selection import StratifiedKFold -from math import log2, ceil - -import sys -from joblib import Parallel, delayed -from multiprocessing import Pool - -from proglearn import ProgressiveLearner -from proglearn import SimpleArgmaxAverage -from proglearn import NeuralClassificationTransformer, TreeClassificationTransformer -from proglearn import TreeClassificationVoter, KNNClassificationVoter - -import tensorflow as tf -from numbers import Number -from collections import Set, Mapping, deque - -import time - -#%% -def unpickle(file): - with open(file, "rb") as fo: - dict = pickle.load(fo, encoding="bytes") - return dict - - -try: # Python 2 - zero_depth_bases = (basestring, Number, xrange, bytearray) - iteritems = "iteritems" -except NameError: # Python 3 - zero_depth_bases = (str, bytes, Number, range, bytearray) - iteritems = "items" - - -def getsize(obj_0): - """Recursively iterate to sum size of object & members.""" - _seen_ids = set() - - def inner(obj): - obj_id = id(obj) - if obj_id in _seen_ids: - return 0 - _seen_ids.add(obj_id) - size = sys.getsizeof(obj) - if isinstance(obj, zero_depth_bases): - pass # bypass remaining control flow and return - elif isinstance(obj, (tuple, list, Set, deque)): - size += sum(inner(i) for i in obj) - elif isinstance(obj, Mapping) or hasattr(obj, iteritems): - size += sum(inner(k) + inner(v) for k, v in getattr(obj, iteritems)()) - # Check for custom object instances - may subclass above too - if hasattr(obj, "__dict__"): - size += inner(vars(obj)) - if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ - size += sum( - inner(getattr(obj, s)) for s in obj.__slots__ if hasattr(obj, s) - ) - return size - - return inner(obj_0) - - -#%% -def LF_experiment( - train_x, - train_y, - test_x, - test_y, - ntrees, - shift, - slot, - model, - num_points_per_task, - acorn=None, -): - - df = pd.DataFrame() - single_task_accuracies = np.zeros(10, dtype=float) - shifts = [] - tasks = [] - base_tasks = [] - accuracies_across_tasks = [] - train_times_across_tasks = [] - single_task_inference_times_across_tasks = [] - multitask_inference_times_across_tasks = [] - model_size = [] - - if model == "dnn": - default_transformer_class = NeuralClassificationTransformer - - network = keras.Sequential() - network.add( - layers.Conv2D( - filters=16, - kernel_size=(3, 3), - activation="relu", - input_shape=np.shape(train_x)[1:], - ) - ) - network.add(layers.BatchNormalization()) - network.add( - layers.Conv2D( - filters=32, - kernel_size=(3, 3), - strides=2, - padding="same", - activation="relu", - ) - ) - network.add(layers.BatchNormalization()) - network.add( - layers.Conv2D( - filters=64, - kernel_size=(3, 3), - strides=2, - padding="same", - activation="relu", - ) - ) - network.add(layers.BatchNormalization()) - network.add( - layers.Conv2D( - filters=128, - kernel_size=(3, 3), - strides=2, - padding="same", - activation="relu", - ) - ) - network.add(layers.BatchNormalization()) - network.add( - layers.Conv2D( - filters=254, - kernel_size=(3, 3), - strides=2, - padding="same", - activation="relu", - ) - ) - - network.add(layers.Flatten()) - network.add(layers.BatchNormalization()) - network.add(layers.Dense(2000, activation="relu")) - network.add(layers.BatchNormalization()) - network.add(layers.Dense(2000, activation="relu")) - network.add(layers.BatchNormalization()) - network.add(layers.Dense(units=10, activation="softmax")) - - default_transformer_kwargs = { - "network": network, - "euclidean_layer_idx": -2, - "num_classes": 10, - "optimizer": keras.optimizers.Adam(3e-4), - } - - default_voter_class = KNNClassificationVoter - default_voter_kwargs = {"k": int(np.log2(num_points_per_task))} - - default_decider_class = SimpleArgmaxAverage - elif model == "uf": - default_transformer_class = TreeClassificationTransformer - default_transformer_kwargs = { - "kwargs": {"max_depth": None, "max_features": "auto"} - } - - default_voter_class = TreeClassificationVoter - default_voter_kwargs = {} - - default_decider_class = SimpleArgmaxAverage - - progressive_learner = ProgressiveLearner( - default_transformer_class=default_transformer_class, - default_transformer_kwargs=default_transformer_kwargs, - default_voter_class=default_voter_class, - default_voter_kwargs=default_voter_kwargs, - default_decider_class=default_decider_class, - ) - - for task_ii in range(10): - print("Starting Task {} For Fold {}".format(task_ii, shift)) - if acorn is not None: - np.random.seed(acorn) - - reduced_sample_no = int(num_points_per_task * (1.29**task_ii)) - - print(reduced_sample_no) - - train_start_time = time.time() - progressive_learner.add_task( - X=train_x[ - task_ii * 5000 - + slot * reduced_sample_no : task_ii * 5000 - + (slot + 1) * reduced_sample_no - ], - y=train_y[ - task_ii * 5000 - + slot * reduced_sample_no : task_ii * 5000 - + (slot + 1) * reduced_sample_no - ], - num_transformers=1 if model == "dnn" else ntrees, - transformer_voter_decider_split=[0.67, 0.33, 0], - decider_kwargs={ - "classes": np.unique( - train_y[ - task_ii * 5000 - + slot * reduced_sample_no : task_ii * 5000 - + (slot + 1) * reduced_sample_no - ] - ) - }, - ) - train_end_time = time.time() - - train_times_across_tasks.append(train_end_time - train_start_time) - model_size.append(getsize(progressive_learner)) - print(model_size) - - single_task_inference_start_time = time.time() - llf_task = progressive_learner.predict( - X=test_x[task_ii * 1000 : (task_ii + 1) * 1000, :], - transformer_ids=[task_ii], - task_id=task_ii, - ) - single_task_inference_end_time = time.time() - single_task_accuracies[task_ii] = np.mean( - llf_task == test_y[task_ii * 1000 : (task_ii + 1) * 1000] - ) - single_task_inference_times_across_tasks.append( - single_task_inference_end_time - single_task_inference_start_time - ) - - for task_jj in range(task_ii + 1): - multitask_inference_start_time = time.time() - llf_task = progressive_learner.predict( - X=test_x[task_jj * 1000 : (task_jj + 1) * 1000, :], task_id=task_jj - ) - multitask_inference_end_time = time.time() - - shifts.append(shift) - tasks.append(task_jj + 1) - base_tasks.append(task_ii + 1) - accuracies_across_tasks.append( - np.mean(llf_task == test_y[task_jj * 1000 : (task_jj + 1) * 1000]) - ) - multitask_inference_times_across_tasks.append( - multitask_inference_end_time - multitask_inference_start_time - ) - - df["data_fold"] = shifts - df["task"] = tasks - df["base_task"] = base_tasks - df["accuracy"] = accuracies_across_tasks - df["multitask_inference_times"] = multitask_inference_times_across_tasks - - df_single_task = pd.DataFrame() - df_single_task["task"] = range(1, 11) - df_single_task["data_fold"] = shift - df_single_task["accuracy"] = single_task_accuracies - df_single_task[ - "single_task_inference_times" - ] = single_task_inference_times_across_tasks - df_single_task["train_times"] = train_times_across_tasks - df_single_task["model_size"] = model_size - - summary = (df, df_single_task) - file_to_save = ( - "result/result/increased_sample_" - + model - + str(ntrees) - + "_" - + str(shift) - + "_" - + str(slot) - + ".pickle" - ) - with open(file_to_save, "wb") as f: - pickle.dump(summary, f) - - -#%% -def cross_val_data(data_x, data_y, num_points_per_task, total_task=10, shift=1): - x = data_x.copy() - y = data_y.copy() - idx = [np.where(data_y == u)[0] for u in np.unique(data_y)] - - batch_per_task = 5000 // num_points_per_task - sample_per_class = num_points_per_task // total_task - test_data_slot = 100 // batch_per_task - - for task in range(total_task): - for batch in range(batch_per_task): - for class_no in range(task * 10, (task + 1) * 10, 1): - indx = np.roll(idx[class_no], (shift - 1) * 100) - - if batch == 0 and class_no == 0 and task == 0: - train_x = x[ - indx[batch * sample_per_class : (batch + 1) * sample_per_class], - :, - ] - train_y = y[ - indx[batch * sample_per_class : (batch + 1) * sample_per_class] - ] - test_x = x[ - indx[ - batch * test_data_slot - + 500 : (batch + 1) * test_data_slot - + 500 - ], - :, - ] - test_y = y[ - indx[ - batch * test_data_slot - + 500 : (batch + 1) * test_data_slot - + 500 - ] - ] - else: - train_x = np.concatenate( - ( - train_x, - x[ - indx[ - batch - * sample_per_class : (batch + 1) - * sample_per_class - ], - :, - ], - ), - axis=0, - ) - train_y = np.concatenate( - ( - train_y, - y[ - indx[ - batch - * sample_per_class : (batch + 1) - * sample_per_class - ] - ], - ), - axis=0, - ) - test_x = np.concatenate( - ( - test_x, - x[ - indx[ - batch * test_data_slot - + 500 : (batch + 1) * test_data_slot - + 500 - ], - :, - ], - ), - axis=0, - ) - test_y = np.concatenate( - ( - test_y, - y[ - indx[ - batch * test_data_slot - + 500 : (batch + 1) * test_data_slot - + 500 - ] - ], - ), - axis=0, - ) - - return train_x, train_y, test_x, test_y - - -#%% -def run_parallel_exp( - data_x, data_y, n_trees, model, num_points_per_task, slot=0, shift=1 -): - train_x, train_y, test_x, test_y = cross_val_data(data_x, data_y, 500, shift=shift) - - if model == "dnn": - config = tf.compat.v1.ConfigProto() - config.gpu_options.allow_growth = True - sess = tf.compat.v1.Session(config=config) - with tf.device("/gpu:" + str(shift % 4)): - LF_experiment( - train_x, - train_y, - test_x, - test_y, - n_trees, - shift, - slot, - model, - num_points_per_task, - acorn=12345, - ) - else: - LF_experiment( - train_x, - train_y, - test_x, - test_y, - n_trees, - shift, - slot, - model, - num_points_per_task, - acorn=12345, - ) - - -#%% -### MAIN HYPERPARAMS ### -model = "dnn" -num_points_per_task = 500 -total_slots = 1 # 5000//num_points_per_task -########################### - -(X_train, y_train), (X_test, y_test) = keras.datasets.cifar100.load_data() -data_x = np.concatenate([X_train, X_test]) -if model == "uf": - data_x = data_x.reshape( - (data_x.shape[0], data_x.shape[1] * data_x.shape[2] * data_x.shape[3]) - ) -data_y = np.concatenate([y_train, y_test]) -data_y = data_y[:, 0] - - -#%% -if model == "uf": - slot_fold = range(total_slots) - shift_fold = range(1, 7, 1) - n_trees = [10] - iterable = product(n_trees, shift_fold, slot_fold) - Parallel(n_jobs=-2, verbose=1)( - delayed(run_parallel_exp)( - data_x, data_y, ntree, model, num_points_per_task, slot=slot, shift=shift - ) - for ntree, shift, slot in iterable - ) -elif model == "dnn": - slot_fold = range(total_slots) - - def perform_shift(shift_slot_tuple): - shift, slot = shift_slot_tuple - return run_parallel_exp( - data_x, data_y, 0, model, num_points_per_task, slot=slot, shift=shift - ) - - print("Performing Stage 1 Shifts") - stage_1_shifts = range(1, 5) - stage_1_iterable = product(stage_1_shifts, slot_fold) - with Pool(4) as p: - p.map(perform_shift, stage_1_iterable) - - print("Performing Stage 2 Shifts") - stage_2_shifts = range(5, 7) - stage_2_iterable = product(stage_2_shifts, slot_fold) - with Pool(4) as p: - p.map(perform_shift, stage_2_iterable) +#%% +import random +import matplotlib.pyplot as plt +import tensorflow as tf +import keras +from keras import layers +from itertools import product +import pandas as pd + +import numpy as np +import pickle + +from sklearn.model_selection import StratifiedKFold +from math import log2, ceil + +import sys +from joblib import Parallel, delayed +from multiprocessing import Pool + +from proglearn import ProgressiveLearner +from proglearn import SimpleArgmaxAverage +from proglearn import NeuralClassificationTransformer, TreeClassificationTransformer +from proglearn import TreeClassificationVoter, KNNClassificationVoter + +import tensorflow as tf +from numbers import Number +from collections import Set, Mapping, deque + +import time + +#%% +def unpickle(file): + with open(file, "rb") as fo: + dict = pickle.load(fo, encoding="bytes") + return dict + + +try: # Python 2 + zero_depth_bases = (basestring, Number, xrange, bytearray) + iteritems = "iteritems" +except NameError: # Python 3 + zero_depth_bases = (str, bytes, Number, range, bytearray) + iteritems = "items" + + +def getsize(obj_0): + """Recursively iterate to sum size of object & members.""" + _seen_ids = set() + + def inner(obj): + obj_id = id(obj) + if obj_id in _seen_ids: + return 0 + _seen_ids.add(obj_id) + size = sys.getsizeof(obj) + if isinstance(obj, zero_depth_bases): + pass # bypass remaining control flow and return + elif isinstance(obj, (tuple, list, Set, deque)): + size += sum(inner(i) for i in obj) + elif isinstance(obj, Mapping) or hasattr(obj, iteritems): + size += sum(inner(k) + inner(v) for k, v in getattr(obj, iteritems)()) + # Check for custom object instances - may subclass above too + if hasattr(obj, "__dict__"): + size += inner(vars(obj)) + if hasattr(obj, "__slots__"): # can have __slots__ with __dict__ + size += sum( + inner(getattr(obj, s)) for s in obj.__slots__ if hasattr(obj, s) + ) + return size + + return inner(obj_0) + + +#%% +def LF_experiment( + train_x, + train_y, + test_x, + test_y, + ntrees, + shift, + slot, + model, + num_points_per_task, + acorn=None, +): + + df = pd.DataFrame() + single_task_accuracies = np.zeros(10, dtype=float) + shifts = [] + tasks = [] + base_tasks = [] + accuracies_across_tasks = [] + train_times_across_tasks = [] + single_task_inference_times_across_tasks = [] + multitask_inference_times_across_tasks = [] + model_size = [] + + if model == "dnn": + default_transformer_class = NeuralClassificationTransformer + + network = keras.Sequential() + network.add( + layers.Conv2D( + filters=16, + kernel_size=(3, 3), + activation="relu", + input_shape=np.shape(train_x)[1:], + ) + ) + network.add(layers.BatchNormalization()) + network.add( + layers.Conv2D( + filters=32, + kernel_size=(3, 3), + strides=2, + padding="same", + activation="relu", + ) + ) + network.add(layers.BatchNormalization()) + network.add( + layers.Conv2D( + filters=64, + kernel_size=(3, 3), + strides=2, + padding="same", + activation="relu", + ) + ) + network.add(layers.BatchNormalization()) + network.add( + layers.Conv2D( + filters=128, + kernel_size=(3, 3), + strides=2, + padding="same", + activation="relu", + ) + ) + network.add(layers.BatchNormalization()) + network.add( + layers.Conv2D( + filters=254, + kernel_size=(3, 3), + strides=2, + padding="same", + activation="relu", + ) + ) + + network.add(layers.Flatten()) + network.add(layers.BatchNormalization()) + network.add(layers.Dense(2000, activation="relu")) + network.add(layers.BatchNormalization()) + network.add(layers.Dense(2000, activation="relu")) + network.add(layers.BatchNormalization()) + network.add(layers.Dense(units=10, activation="softmax")) + + default_transformer_kwargs = { + "network": network, + "euclidean_layer_idx": -2, + "num_classes": 10, + "optimizer": keras.optimizers.Adam(3e-4), + } + + default_voter_class = KNNClassificationVoter + default_voter_kwargs = {"k": int(np.log2(num_points_per_task))} + + default_decider_class = SimpleArgmaxAverage + elif model == "uf": + default_transformer_class = TreeClassificationTransformer + default_transformer_kwargs = { + "kwargs": {"max_depth": None, "max_features": "auto"} + } + + default_voter_class = TreeClassificationVoter + default_voter_kwargs = {} + + default_decider_class = SimpleArgmaxAverage + + progressive_learner = ProgressiveLearner( + default_transformer_class=default_transformer_class, + default_transformer_kwargs=default_transformer_kwargs, + default_voter_class=default_voter_class, + default_voter_kwargs=default_voter_kwargs, + default_decider_class=default_decider_class, + ) + + for task_ii in range(10): + print("Starting Task {} For Fold {}".format(task_ii, shift)) + if acorn is not None: + np.random.seed(acorn) + + reduced_sample_no = int(num_points_per_task * (1.29**task_ii)) + + print(reduced_sample_no) + + train_start_time = time.time() + progressive_learner.add_task( + X=train_x[ + task_ii * 5000 + + slot * reduced_sample_no : task_ii * 5000 + + (slot + 1) * reduced_sample_no + ], + y=train_y[ + task_ii * 5000 + + slot * reduced_sample_no : task_ii * 5000 + + (slot + 1) * reduced_sample_no + ], + num_transformers=1 if model == "dnn" else ntrees, + transformer_voter_decider_split=[0.67, 0.33, 0], + decider_kwargs={ + "classes": np.unique( + train_y[ + task_ii * 5000 + + slot * reduced_sample_no : task_ii * 5000 + + (slot + 1) * reduced_sample_no + ] + ) + }, + ) + train_end_time = time.time() + + train_times_across_tasks.append(train_end_time - train_start_time) + model_size.append(getsize(progressive_learner)) + print(model_size) + + single_task_inference_start_time = time.time() + llf_task = progressive_learner.predict( + X=test_x[task_ii * 1000 : (task_ii + 1) * 1000, :], + transformer_ids=[task_ii], + task_id=task_ii, + ) + single_task_inference_end_time = time.time() + single_task_accuracies[task_ii] = np.mean( + llf_task == test_y[task_ii * 1000 : (task_ii + 1) * 1000] + ) + single_task_inference_times_across_tasks.append( + single_task_inference_end_time - single_task_inference_start_time + ) + + for task_jj in range(task_ii + 1): + multitask_inference_start_time = time.time() + llf_task = progressive_learner.predict( + X=test_x[task_jj * 1000 : (task_jj + 1) * 1000, :], task_id=task_jj + ) + multitask_inference_end_time = time.time() + + shifts.append(shift) + tasks.append(task_jj + 1) + base_tasks.append(task_ii + 1) + accuracies_across_tasks.append( + np.mean(llf_task == test_y[task_jj * 1000 : (task_jj + 1) * 1000]) + ) + multitask_inference_times_across_tasks.append( + multitask_inference_end_time - multitask_inference_start_time + ) + + df["data_fold"] = shifts + df["task"] = tasks + df["base_task"] = base_tasks + df["accuracy"] = accuracies_across_tasks + df["multitask_inference_times"] = multitask_inference_times_across_tasks + + df_single_task = pd.DataFrame() + df_single_task["task"] = range(1, 11) + df_single_task["data_fold"] = shift + df_single_task["accuracy"] = single_task_accuracies + df_single_task[ + "single_task_inference_times" + ] = single_task_inference_times_across_tasks + df_single_task["train_times"] = train_times_across_tasks + df_single_task["model_size"] = model_size + + summary = (df, df_single_task) + file_to_save = ( + "result/result/increased_sample_" + + model + + str(ntrees) + + "_" + + str(shift) + + "_" + + str(slot) + + ".pickle" + ) + with open(file_to_save, "wb") as f: + pickle.dump(summary, f) + + +#%% +def cross_val_data(data_x, data_y, num_points_per_task, total_task=10, shift=1): + x = data_x.copy() + y = data_y.copy() + idx = [np.where(data_y == u)[0] for u in np.unique(data_y)] + + batch_per_task = 5000 // num_points_per_task + sample_per_class = num_points_per_task // total_task + test_data_slot = 100 // batch_per_task + + for task in range(total_task): + for batch in range(batch_per_task): + for class_no in range(task * 10, (task + 1) * 10, 1): + indx = np.roll(idx[class_no], (shift - 1) * 100) + + if batch == 0 and class_no == 0 and task == 0: + train_x = x[ + indx[batch * sample_per_class : (batch + 1) * sample_per_class], + :, + ] + train_y = y[ + indx[batch * sample_per_class : (batch + 1) * sample_per_class] + ] + test_x = x[ + indx[ + batch * test_data_slot + + 500 : (batch + 1) * test_data_slot + + 500 + ], + :, + ] + test_y = y[ + indx[ + batch * test_data_slot + + 500 : (batch + 1) * test_data_slot + + 500 + ] + ] + else: + train_x = np.concatenate( + ( + train_x, + x[ + indx[ + batch + * sample_per_class : (batch + 1) + * sample_per_class + ], + :, + ], + ), + axis=0, + ) + train_y = np.concatenate( + ( + train_y, + y[ + indx[ + batch + * sample_per_class : (batch + 1) + * sample_per_class + ] + ], + ), + axis=0, + ) + test_x = np.concatenate( + ( + test_x, + x[ + indx[ + batch * test_data_slot + + 500 : (batch + 1) * test_data_slot + + 500 + ], + :, + ], + ), + axis=0, + ) + test_y = np.concatenate( + ( + test_y, + y[ + indx[ + batch * test_data_slot + + 500 : (batch + 1) * test_data_slot + + 500 + ] + ], + ), + axis=0, + ) + + return train_x, train_y, test_x, test_y + + +#%% +def run_parallel_exp( + data_x, data_y, n_trees, model, num_points_per_task, slot=0, shift=1 +): + train_x, train_y, test_x, test_y = cross_val_data(data_x, data_y, 500, shift=shift) + + if model == "dnn": + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + sess = tf.compat.v1.Session(config=config) + with tf.device("/gpu:" + str(shift % 4)): + LF_experiment( + train_x, + train_y, + test_x, + test_y, + n_trees, + shift, + slot, + model, + num_points_per_task, + acorn=12345, + ) + else: + LF_experiment( + train_x, + train_y, + test_x, + test_y, + n_trees, + shift, + slot, + model, + num_points_per_task, + acorn=12345, + ) + + +#%% +### MAIN HYPERPARAMS ### +model = "dnn" +num_points_per_task = 500 +total_slots = 1 # 5000//num_points_per_task +########################### + +(X_train, y_train), (X_test, y_test) = keras.datasets.cifar100.load_data() +data_x = np.concatenate([X_train, X_test]) +if model == "uf": + data_x = data_x.reshape( + (data_x.shape[0], data_x.shape[1] * data_x.shape[2] * data_x.shape[3]) + ) +data_y = np.concatenate([y_train, y_test]) +data_y = data_y[:, 0] + + +#%% +if model == "uf": + slot_fold = range(total_slots) + shift_fold = range(1, 7, 1) + n_trees = [10] + iterable = product(n_trees, shift_fold, slot_fold) + Parallel(n_jobs=-2, verbose=1)( + delayed(run_parallel_exp)( + data_x, data_y, ntree, model, num_points_per_task, slot=slot, shift=shift + ) + for ntree, shift, slot in iterable + ) +elif model == "dnn": + slot_fold = range(total_slots) + + def perform_shift(shift_slot_tuple): + shift, slot = shift_slot_tuple + return run_parallel_exp( + data_x, data_y, 0, model, num_points_per_task, slot=slot, shift=shift + ) + + print("Performing Stage 1 Shifts") + stage_1_shifts = range(1, 5) + stage_1_iterable = product(stage_1_shifts, slot_fold) + with Pool(4) as p: + p.map(perform_shift, stage_1_iterable) + + print("Performing Stage 2 Shifts") + stage_2_shifts = range(5, 7) + stage_2_iterable = product(stage_2_shifts, slot_fold) + with Pool(4) as p: + p.map(perform_shift, stage_2_iterable) diff --git a/docs/experiments.rst b/docs/experiments.rst index 94e619049d..69c80990ba 100644 --- a/docs/experiments.rst +++ b/docs/experiments.rst @@ -16,6 +16,7 @@ The following experiments illustrate specific tests using the ``ProgLearn`` pack experiments/recruitment_across_datasets experiments/spiral_exp experiments/spoken_digit_exp + experiments/streaming_forest_tutorial experiments/xor_rxor_exp experiments/xor_rxor_with_cpd experiments/xor_rxor_with_icp diff --git a/docs/experiments/functions/streaming_spirals_functions.py b/docs/experiments/functions/streaming_spirals_functions.py new file mode 100644 index 0000000000..59d945c1a3 --- /dev/null +++ b/docs/experiments/functions/streaming_spirals_functions.py @@ -0,0 +1,589 @@ +from proglearn.forest import LifelongClassificationForest +from sdtf import StreamDecisionForest +import matplotlib.pyplot as plt +from matplotlib.ticker import ScalarFormatter +import numpy as np +import seaborn as sns +from joblib import Parallel, delayed +from proglearn.sims import generate_spirals +from math import ceil, log2 + + +def plot_spirals(spiral1, y_spiral1, num_spirals1, spiral2, y_spiral2, num_spirals2): + """ + plots spiral 1 and spiral 2 + """ + colors = sns.color_palette("Dark2", n_colors=5) + + fig, ax = plt.subplots(1, 2, figsize=(16, 8)) + + clr = [colors[i] for i in y_spiral1] + ax[0].scatter(spiral1[:, 0], spiral1[:, 1], c=clr, s=50) + ax[0].set_xticks([]) + ax[0].set_yticks([]) + ax[0].set_title(str(num_spirals1) + " spirals", fontsize=30) + ax[0].axis("off") + + clr = [colors[i] for i in y_spiral2] + ax[1].scatter(spiral2[:, 0], spiral2[:, 1], c=clr, s=50) + ax[1].set_xticks([]) + ax[1].set_yticks([]) + ax[1].set_title(str(num_spirals2) + " spirals", fontsize=30) + ax[1].axis("off") + + +def run_spiral_experiments(mc_rep): + """ + A function to run the spirals experiment in streaming and batch settings + """ + # generate all results + stream_spiral_errors = run_experiment_stream(mc_rep) + batch_spiral_error, batch_spiral_le = run_batch_experiment(mc_rep) + # format for plotting + streaming_spiral_errors, streaming_spiral_efficiencies = get_learning_efficiencies( + stream_spiral_errors + ) + spiral_results = [ + streaming_spiral_errors, + streaming_spiral_efficiencies, + batch_spiral_error, + batch_spiral_le, + ] + + return spiral_results + + +def run_experiment_stream(mc_rep): + mean_errors = experiment_stream() + for i in range(mc_rep - 1): + mean_errors += experiment_stream() + mean_errors = mean_errors / mc_rep + return mean_errors + + +def get_learning_efficiencies(stream_errors): + stream_synf_FLE = stream_errors[2, :] / stream_errors[6, :] + sdf_FLE = stream_errors[5, :] / stream_errors[7, :] + stream_synf_BLE = stream_errors[0, :] / stream_errors[1, :] + sdf_BLE = stream_errors[3, :] / stream_errors[4, :] + errors = [stream_errors[1], stream_errors[6], stream_errors[4], stream_errors[7]] + learning_efficiencies = [stream_synf_FLE, sdf_FLE, stream_synf_BLE, sdf_BLE] + return errors, learning_efficiencies + + +def experiment_stream( + n_task1=750, + n_task2=750, + n_update=25, + n_test=1000, + n_trees=10, +): + """ + A function to do stream SynF and stream decision forest experiment + between two tasks where the data is generated using generate spirals. + """ + errors = np.zeros((8, ((n_task1 + n_task2) // n_update) - 1), dtype=float) + + # instantiate classifiers + synf_single_task_t1 = LifelongClassificationForest(default_n_estimators=n_trees) + synf_multi_task = LifelongClassificationForest(default_n_estimators=n_trees) + synf_single_task_t2 = LifelongClassificationForest(default_n_estimators=n_trees) + sdf_single_task_t1 = StreamDecisionForest(n_estimators=n_trees) + sdf_multi_task = StreamDecisionForest(n_estimators=n_trees) + sdf_single_task_t2 = StreamDecisionForest(n_estimators=n_trees) + + # generate initial data for add_task + x1, y1 = generate_spirals(n_update, 3, noise=0.8) + x2, y2 = generate_spirals(n_update, 5, noise=0.4) + x1_test, y1_test = generate_spirals(n_test, 3, noise=0.8) + x2_test, y2_test = generate_spirals(n_test, 5, noise=0.4) + + # add tasks to progressive learners/decision forests + synf_single_task_t1.add_task(x1, y1, task_id=0, classes=[0, 1, 2]) + synf_multi_task.add_task(x1, y1, task_id=0, classes=[0, 1, 2]) + sdf_single_task_t1.partial_fit(x1, y1, classes=[0, 1, 2, 3, 4]) + sdf_multi_task.partial_fit(x1, y1, classes=[0, 1, 2, 3, 4]) + + # updating task 1 + for i in range(n_task1 // n_update - 1): + x, y = generate_spirals(n_update, 3, noise=0.8) + synf_single_task_t1.update_task(x, y, task_id=0) + synf_multi_task.update_task(x, y, task_id=0) + sdf_single_task_t1.partial_fit(x, y) + sdf_multi_task.partial_fit(x, y) + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_multi_y_hat = synf_multi_task.predict(x1_test, task_id=0) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_multi_y_hat = sdf_multi_task.predict(x1_test) + errors[0, i] = 1 - np.mean(synf_t1_y_hat == y1_test) # synf single task, t1 + errors[1, i] = 1 - np.mean(synf_multi_y_hat == y1_test) # synf multi task t1 + errors[2, i] = 0.2 # synf single task, t2 + errors[3, i] = 1 - np.mean(sdf_t1_y_hat == y1_test) # sdf single task, t1 + errors[4, i] = 1 - np.mean(sdf_multi_y_hat == y1_test) # sdf multi task, t1 + errors[5, i] = 0.2 # sdf single task, t2 + errors[6, i] = 0.2 # synf multi task, t2 + errors[7, i] = 0.2 # sdf multi task, t2 + + idx = (n_task1 // n_update) - 1 + # updating task 2 + synf_multi_task.add_task(x2, y2, task_id=1, classes=[0, 1, 2, 3, 4]) + synf_single_task_t2.add_task(x2, y2, task_id=1, classes=[0, 1, 2, 3, 4]) + sdf_single_task_t2.partial_fit(x2, y2, classes=[0, 1, 2, 3, 4]) + sdf_multi_task.partial_fit(x2, y2, classes=[0, 1, 2, 3, 4]) + + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_t2_y_hat = synf_single_task_t2.predict(x2_test, task_id=1) + synf_multi_y_hat_t1 = synf_multi_task.predict(x1_test, task_id=0) + synf_multi_y_hat_t2 = synf_multi_task.predict(x2_test, task_id=1) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_t2_y_hat = sdf_single_task_t2.predict(x2_test) + sdf_multi_y_hat_t1 = sdf_multi_task.predict(x1_test) + sdf_multi_y_hat_t2 = sdf_multi_task.predict(x2_test) + + errors[0, idx] = 1 - np.mean(synf_t1_y_hat == y1_test) # synf single task, t1 + errors[1, idx] = 1 - np.mean(synf_multi_y_hat_t1 == y1_test) # synf multi task t1 + errors[2, idx] = 1 - np.mean(synf_t2_y_hat == y2_test) # synf single task, t2 + errors[3, idx] = 1 - np.mean(sdf_t1_y_hat == y1_test) # sdf single task, t1 + errors[4, idx] = 1 - np.mean(sdf_multi_y_hat_t1 == y1_test) # sdf multi task, t1 + errors[5, idx] = 1 - np.mean(sdf_t2_y_hat == y2_test) # sdf single task, t2 + errors[6, idx] = 1 - np.mean(synf_multi_y_hat_t2 == y2_test) # synf multi task, t2 + errors[7, idx] = 1 - np.mean(sdf_multi_y_hat_t2 == y2_test) # sdf multi task, t2 + for i in range(n_task2 // n_update - 1): + x, y = generate_spirals(n_update, 5, noise=0.4) + synf_multi_task.update_task(x, y, task_id=1) + synf_single_task_t2.update_task(x, y, task_id=1) + sdf_single_task_t2.partial_fit(x, y) + sdf_multi_task.partial_fit(x, y) + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_t2_y_hat = synf_single_task_t2.predict(x2_test, task_id=1) + synf_multi_y_hat_t1 = synf_multi_task.predict(x1_test, task_id=0) + synf_multi_y_hat_t2 = synf_multi_task.predict(x2_test, task_id=1) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_t2_y_hat = sdf_single_task_t2.predict(x2_test) + sdf_multi_y_hat_t1 = sdf_multi_task.predict(x1_test) + sdf_multi_y_hat_t2 = sdf_multi_task.predict(x2_test) + + errors[0, i + idx + 1] = 1 - np.mean( + synf_t1_y_hat == y1_test + ) # synf single task, t1 + errors[1, i + idx + 1] = 1 - np.mean( + synf_multi_y_hat_t1 == y1_test + ) # synf multi task t1 + errors[2, i + idx + 1] = 1 - np.mean( + synf_t2_y_hat == y2_test + ) # synf single task, t2 + errors[3, i + idx + 1] = 1 - np.mean( + sdf_t1_y_hat == y1_test + ) # sdf single task, t1 + errors[4, i + idx + 1] = 1 - np.mean( + sdf_multi_y_hat_t1 == y1_test + ) # sdf multi task, t1 + errors[5, i + idx + 1] = 1 - np.mean( + sdf_t2_y_hat == y2_test + ) # sdf single task, t2 + errors[6, i + idx + 1] = 1 - np.mean( + synf_multi_y_hat_t2 == y2_test + ) # synf multi task, t2 + errors[7, i + idx + 1] = 1 - np.mean( + sdf_multi_y_hat_t2 == y2_test + ) # sdf multi task, t2 + + return errors + + +def experiment_batch( + n_task1=750, + n_task2=750, + n_test=1000, + n_trees=10, + max_depth=None, + random_state=None, +): + + if n_task1 == 0 and n_task2 == 0: + raise ValueError("Wake up and provide samples to train!!!") + + if random_state != None: + np.random.seed(random_state) + + errors = np.zeros(6, dtype=float) + + progressive_learner = LifelongClassificationForest(default_n_estimators=n_trees) + uf1 = LifelongClassificationForest(default_n_estimators=n_trees) + naive_uf = LifelongClassificationForest(default_n_estimators=n_trees) + uf2 = LifelongClassificationForest(default_n_estimators=n_trees) + + # source data + X_task1, y_task1 = generate_spirals(n_task1, 3, noise=0.8) + test_task1, test_label_task1 = generate_spirals(n_test, 3, noise=0.8) + + # target data + X_task2, y_task2 = generate_spirals(n_task2, 5, noise=0.4) + test_task2, test_label_task2 = generate_spirals(n_test, 5, noise=0.4) + + if n_task1 == 0: + progressive_learner.add_task(X_task2, y_task2, n_estimators=n_trees) + uf2.add_task(X_task2, y_task2, n_estimators=n_trees) + + errors[0] = 0.5 + errors[1] = 0.5 + + uf_task2 = uf2.predict(test_task2, task_id=0) + l2f_task2 = progressive_learner.predict(test_task2, task_id=0) + + errors[2] = 1 - np.mean(uf_task2 == test_label_task2) + errors[3] = 1 - np.mean(l2f_task2 == test_label_task2) + + errors[4] = 0.5 + errors[5] = 1 - np.mean(uf_task2 == test_label_task2) + elif n_task2 == 0: + progressive_learner.add_task(X_task1, y_task1, n_estimators=n_trees) + uf1.add_task(X_task1, y_task1, n_estimators=n_trees) + + uf_task1 = uf1.predict(test_task1, task_id=0) + l2f_task1 = progressive_learner.predict(test_task1, task_id=0) + + errors[0] = 1 - np.mean(uf_task1 == test_label_task1) + errors[1] = 1 - np.mean(l2f_task1 == test_label_task1) + + errors[2] = 0.5 + errors[3] = 0.5 + + errors[4] = 1 - np.mean(uf_task1 == test_label_task1) + errors[5] = 0.5 + else: + progressive_learner.add_task(X_task1, y_task1, n_estimators=n_trees) + progressive_learner.add_task(X_task2, y_task2, n_estimators=n_trees) + + uf1.add_task(X_task1, y_task1, n_estimators=2 * n_trees) + uf2.add_task(X_task2, y_task2, n_estimators=2 * n_trees) + + naive_uf_train_x = np.concatenate((X_task1, X_task2), axis=0) + naive_uf_train_y = np.concatenate((y_task1, y_task2), axis=0) + naive_uf.add_task(naive_uf_train_x, naive_uf_train_y, n_estimators=n_trees) + + uf_task1 = uf1.predict(test_task1, task_id=0) + l2f_task1 = progressive_learner.predict(test_task1, task_id=0) + uf_task2 = uf2.predict(test_task2, task_id=0) + l2f_task2 = progressive_learner.predict(test_task2, task_id=1) + naive_uf_task1 = naive_uf.predict(test_task1, task_id=0) + naive_uf_task2 = naive_uf.predict(test_task2, task_id=0) + + errors[0] = 1 - np.mean(uf_task1 == test_label_task1) + errors[1] = 1 - np.mean(l2f_task1 == test_label_task1) + errors[2] = 1 - np.mean(uf_task2 == test_label_task2) + errors[3] = 1 - np.mean(l2f_task2 == test_label_task2) + errors[4] = 1 - np.mean(naive_uf_task1 == test_label_task1) + errors[5] = 1 - np.mean(naive_uf_task2 == test_label_task2) + + return errors + + +def run_batch_experiment(mc_rep): + n_test = 1000 + n_trees = 10 + n_xor = (100 * np.arange(0.25, 7.50, step=0.25)).astype(int) + n_xnor = (100 * np.arange(0.25, 7.75, step=0.25)).astype(int) + mean_error = np.zeros((6, len(n_xor) + len(n_xnor))) + std_error = np.zeros((6, len(n_xor) + len(n_xnor))) + mean_te = np.zeros((4, len(n_xor) + len(n_xnor))) + std_te = np.zeros((4, len(n_xor) + len(n_xnor))) + for i, n1 in enumerate(n_xor): + # run experiment in parallel + error = np.array( + Parallel(n_jobs=1, verbose=0)( + delayed(experiment_batch)(n1, 0, max_depth=ceil(log2(n1))) + for _ in range(mc_rep) + ) + ) + # extract relevant data and store in arrays + mean_error[:, i] = np.mean(error, axis=0) + std_error[:, i] = np.std(error, ddof=1, axis=0) + mean_te[0, i] = np.mean(error[:, 0]) / np.mean(error[:, 1]) + mean_te[1, i] = np.mean(error[:, 2]) / np.mean(error[:, 3]) + mean_te[2, i] = np.mean(error[:, 0]) / np.mean(error[:, 4]) + mean_te[3, i] = np.mean(error[:, 2]) / np.mean(error[:, 5]) + + # initialize learning on n-xor data + if n1 == n_xor[-1]: + for j, n2 in enumerate(n_xnor): + # run experiment in parallel + error = np.array( + Parallel(n_jobs=1, verbose=0)( + delayed(experiment_batch)(n1, n2, max_depth=ceil(log2(750))) + for _ in range(mc_rep) + ) + ) + # extract relevant data and store in arrays + mean_error[:, i + j + 1] = np.mean(error, axis=0) + std_error[:, i + j + 1] = np.std(error, ddof=1, axis=0) + mean_te[0, i + j + 1] = np.mean(error[:, 0]) / np.mean(error[:, 1]) + mean_te[1, i + j + 1] = np.mean(error[:, 2]) / np.mean(error[:, 3]) + mean_te[2, i + j + 1] = np.mean(error[:, 0]) / np.mean(error[:, 4]) + mean_te[3, i + j + 1] = np.mean(error[:, 2]) / np.mean(error[:, 5]) + + return mean_error, mean_te + + +def plot_error(results): + """Plot Generalization Errors for experiment type (RXOR or XNOR)""" + algorithms = [ + "Stream Synergistic Forest", + "Stream Decision Forest", + "Batch Synergistic Forest", + "Batch Decision Forest", + ] + fontsize = 30 + labelsize = 28 + ls = ["-", "--"] + colors = sns.color_palette("bright") + fig = plt.figure(figsize=(26, 14)) + gs = fig.add_gridspec(14, 26) + ax1 = fig.add_subplot(gs[7:, :5]) + # Stream Synergistic Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[0][0], + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[0][2], + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[2][1], + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[2][4], + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Generalization Error (3 spirals)", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([0.25, 0.45, 0.65, 0.85]) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + + ax1.text(75, np.mean(ax1.get_ylim()) + 0.35, "3 spirals", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 0.35, "5 spirals", fontsize=26) + + ######## RXOR + ax1 = fig.add_subplot(gs[7:, 7:12]) + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int)[30:] + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + results[0][1][30:], + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + results[0][3][30:], + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + results[2][3][30:], + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + results[2][5][30:], + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + ax1.set_ylabel("Generalization Error (5 spirals)", fontsize=fontsize) + ax1.legend( + bbox_to_anchor=(1, -0.25), + loc="upper center", + fontsize=20, + ncol=4, + frameon=False, + ) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([0.25, 0.45, 0.65, 0.85]) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + + ax1.text(75, np.mean(ax1.get_ylim()) + 0.4, "3 spirals", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 0.4, "5 spirals", fontsize=26) + + #### Transfer Efficiency + ax1 = fig.add_subplot(gs[7:, 14:19]) + algorithms = [ + "Stream Synergistic Forest TE", + "Stream Decision Forest TE", + "Batch Synergistic Forest TE", + "Batch Decision Forest TE", + ] + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int)[30:] + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][0][30:]), + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][1][30:]), + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][1][30:]), + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][3][30:]), + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Log Forward Learning Efficiency", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([-1, 0, 1]) + ax1.set_xlim(-1, 1) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + ax1.axhline(y=0, c="gray", linewidth=1.5, linestyle="dashed") + + ax1.text(75, np.mean(ax1.get_ylim()) + 1.1, "3 Spirals", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 1.1, "5 spirals ", fontsize=26) + + #### BACKWARDS Transfer Efficiency + ax1 = fig.add_subplot(gs[7:, 21:]) + algorithms = [ + "Stream Synergistic Forest TE", + "Stream Decision Forest TE", + "Batch Synergistic Forest TE", + "Batch Decision Forest TE", + ] + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int) + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][2]), + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][3]), + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][0]), + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][2]), + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Log Backward Learning Efficiency", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([-1, 0, 1]) + ax1.set_xlim(-1, 1) + ax1.set_xticks([25, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + ax1.axhline(y=0, c="gray", linewidth=1.5, linestyle="dashed") + + ax1.text(75, np.mean(ax1.get_ylim()) + 1.1, "3 Spirals", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 1.1, "5 spirals", fontsize=26) diff --git a/docs/experiments/functions/streaming_xor_functions.py b/docs/experiments/functions/streaming_xor_functions.py new file mode 100644 index 0000000000..de59d62add --- /dev/null +++ b/docs/experiments/functions/streaming_xor_functions.py @@ -0,0 +1,632 @@ +from proglearn.forest import LifelongClassificationForest +from sdtf import StreamDecisionForest +import matplotlib.pyplot as plt +from matplotlib.ticker import ScalarFormatter +import numpy as np +import seaborn as sns +from joblib import Parallel, delayed +from proglearn.sims import generate_gaussian_parity +from math import ceil, log2 + + +def run_gaussian_experiments(mc_rep): + """ + A function to run both Gaussian R-XOR and XNOR experiments in streaming and batch settings + """ + # generate all results + stream_rxor_errors = run_experiment_stream(mc_rep, task2_angle=np.pi / 4) + stream_xnor_errors = run_experiment_stream(mc_rep, task2_angle=np.pi / 2) + batch_xnor_error, batch_xnor_le = run_batch_experiment(mc_rep, t2_angle=np.pi / 2) + batch_rxor_error, batch_rxor_le = run_batch_experiment(mc_rep, t2_angle=np.pi / 4) + # format for plotting + streaming_rxor_errors, streaming_rxor_efficiencies = get_learning_efficiencies( + stream_rxor_errors + ) + streaming_xnor_errors, streaming_xnor_efficiencies = get_learning_efficiencies( + stream_xnor_errors + ) + rxor_results = [ + streaming_rxor_errors, + streaming_rxor_efficiencies, + batch_rxor_error, + batch_rxor_le, + ] + xnor_results = [ + streaming_xnor_errors, + streaming_xnor_efficiencies, + batch_xnor_error, + batch_xnor_le, + ] + return rxor_results, xnor_results + + +def run_experiment_stream(mc_rep, task2_angle): + mean_errors = experiment_stream(task2_angle=task2_angle) + for i in range(mc_rep - 1): + mean_errors += experiment_stream(task2_angle=task2_angle) + mean_errors = mean_errors / mc_rep + return mean_errors + + +def get_learning_efficiencies(stream_errors): + """ + Returns + ---------- + errors: SynF task 1, Synf task 2, SDF task 1, SDF task 2 + learning_efficiencies: SynF FLE, SDF FLE, SynF BLE, SDF BLE + """ + stream_synf_FLE = stream_errors[2, :] / stream_errors[6, :] + sdf_FLE = stream_errors[5, :] / stream_errors[7, :] + stream_synf_BLE = stream_errors[0, :] / stream_errors[1, :] + sdf_BLE = stream_errors[3, :] / stream_errors[4, :] + errors = [stream_errors[1], stream_errors[6], stream_errors[4], stream_errors[7]] + learning_efficiencies = [stream_synf_FLE, sdf_FLE, stream_synf_BLE, sdf_BLE] + return errors, learning_efficiencies + + +def experiment_stream( + n_task1=750, + n_task2=750, + n_update=25, + n_test=1000, + task2_angle=np.pi / 2, + n_trees=10, +): + """ + A function to do stream SynF and stream decision forest experiment + between two tasks where the data is generated using Gaussian parity. + """ + errors = np.zeros((8, ((n_task1 + n_task2) // n_update) - 1), dtype=float) + + # instantiate classifiers + synf_single_task_t1 = LifelongClassificationForest(default_n_estimators=n_trees) + synf_multi_task = LifelongClassificationForest(default_n_estimators=n_trees) + synf_single_task_t2 = LifelongClassificationForest(default_n_estimators=n_trees) + sdf_single_task_t1 = StreamDecisionForest(n_estimators=n_trees) + sdf_multi_task = StreamDecisionForest(n_estimators=n_trees) + sdf_single_task_t2 = StreamDecisionForest(n_estimators=n_trees) + + # generate initial data for add_task + x1, y1 = generate_gaussian_parity(n_update) + x2, y2 = generate_gaussian_parity(n_update, angle_params=task2_angle) + x1_test, y1_test = generate_gaussian_parity(1000) + x2_test, y2_test = generate_gaussian_parity(1000, angle_params=task2_angle) + + # add tasks to progressive learners/decision forests + synf_single_task_t1.add_task(x1, y1, task_id=0, classes=[0, 1]) + synf_multi_task.add_task(x1, y1, task_id=0, classes=[0, 1]) + sdf_single_task_t1.partial_fit(x1, y1, classes=[0, 1]) + sdf_multi_task.partial_fit(x1, y1, classes=[0, 1]) + + # updating task 1 + for i in range(n_task1 // n_update - 1): + x, y = generate_gaussian_parity(n_update) + synf_single_task_t1.update_task(x, y, task_id=0) + synf_multi_task.update_task(x, y, task_id=0) + sdf_single_task_t1.partial_fit(x, y) + sdf_multi_task.partial_fit(x, y) + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_multi_y_hat = synf_multi_task.predict(x1_test, task_id=0) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_multi_y_hat = sdf_multi_task.predict(x1_test) + errors[0, i] = 1 - np.mean(synf_t1_y_hat == y1_test) # synf single task, t1 + errors[1, i] = 1 - np.mean(synf_multi_y_hat == y1_test) # synf multi task t1 + errors[2, i] = 0.5 # synf single task, t2 + errors[3, i] = 1 - np.mean(sdf_t1_y_hat == y1_test) # sdf single task, t1 + errors[4, i] = 1 - np.mean(sdf_multi_y_hat == y1_test) # sdf multi task, t1 + errors[5, i] = 0.5 # sdf single task, t2 + errors[6, i] = 0.5 # synf multi task, t2 + errors[7, i] = 0.5 # sdf multi task, t2 + + idx = (n_task1 // n_update) - 1 + # updating task 2 + synf_multi_task.add_task(x2, y2, task_id=1, classes=[0, 1]) + synf_single_task_t2.add_task(x2, y2, task_id=1, classes=[0, 1]) + sdf_single_task_t2.partial_fit(x2, y2, classes=[0, 1]) + sdf_multi_task.partial_fit(x2, y2, classes=[0, 1]) + + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_t2_y_hat = synf_single_task_t2.predict(x2_test, task_id=1) + synf_multi_y_hat_t1 = synf_multi_task.predict(x1_test, task_id=0) + synf_multi_y_hat_t2 = synf_multi_task.predict(x2_test, task_id=1) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_t2_y_hat = sdf_single_task_t2.predict(x2_test) + sdf_multi_y_hat_t1 = sdf_multi_task.predict(x1_test) + sdf_multi_y_hat_t2 = sdf_multi_task.predict(x2_test) + + errors[0, idx] = 1 - np.mean(synf_t1_y_hat == y1_test) # synf single task, t1 + errors[1, idx] = 1 - np.mean(synf_multi_y_hat_t1 == y1_test) # synf multi task t1 + errors[2, idx] = 1 - np.mean(synf_t2_y_hat == y2_test) # synf single task, t2 + errors[3, idx] = 1 - np.mean(sdf_t1_y_hat == y1_test) # sdf single task, t1 + errors[4, idx] = 1 - np.mean(sdf_multi_y_hat_t1 == y1_test) # sdf multi task, t1 + errors[5, idx] = 1 - np.mean(sdf_t2_y_hat == y2_test) # sdf single task, t2 + errors[6, idx] = 1 - np.mean(synf_multi_y_hat_t2 == y2_test) # synf multi task, t2 + errors[7, idx] = 1 - np.mean(sdf_multi_y_hat_t2 == y2_test) # sdf multi task, t2 + for i in range(n_task2 // n_update - 1): + x, y = generate_gaussian_parity(n_update, angle_params=task2_angle) + synf_multi_task.update_task(x, y, task_id=1) + synf_single_task_t2.update_task(x, y, task_id=1) + sdf_single_task_t2.partial_fit(x, y) + sdf_multi_task.partial_fit(x, y) + synf_t1_y_hat = synf_single_task_t1.predict(x1_test, task_id=0) + synf_t2_y_hat = synf_single_task_t2.predict(x2_test, task_id=1) + synf_multi_y_hat_t1 = synf_multi_task.predict(x1_test, task_id=0) + synf_multi_y_hat_t2 = synf_multi_task.predict(x2_test, task_id=1) + sdf_t1_y_hat = sdf_single_task_t1.predict(x1_test) + sdf_t2_y_hat = sdf_single_task_t2.predict(x2_test) + sdf_multi_y_hat_t1 = sdf_multi_task.predict(x1_test) + sdf_multi_y_hat_t2 = sdf_multi_task.predict(x2_test) + + errors[0, i + idx + 1] = 1 - np.mean( + synf_t1_y_hat == y1_test + ) # synf single task, t1 + errors[1, i + idx + 1] = 1 - np.mean( + synf_multi_y_hat_t1 == y1_test + ) # synf multi task t1 + errors[2, i + idx + 1] = 1 - np.mean( + synf_t2_y_hat == y2_test + ) # synf single task, t2 + errors[3, i + idx + 1] = 1 - np.mean( + sdf_t1_y_hat == y1_test + ) # sdf single task, t1 + errors[4, i + idx + 1] = 1 - np.mean( + sdf_multi_y_hat_t1 == y1_test + ) # sdf multi task, t1 + errors[5, i + idx + 1] = 1 - np.mean( + sdf_t2_y_hat == y2_test + ) # sdf single task, t2 + errors[6, i + idx + 1] = 1 - np.mean( + synf_multi_y_hat_t2 == y2_test + ) # synf multi task, t2 + errors[7, i + idx + 1] = 1 - np.mean( + sdf_multi_y_hat_t2 == y2_test + ) # sdf multi task, t2 + + return errors + + +def experiment_batch( + n_task1=750, + n_task2=750, + n_test=1000, + task1_angle=0, + task2_angle=np.pi / 2, + n_trees=10, + max_depth=None, + random_state=None, +): + + """ + A function to do SynF experiment between two tasks + where the task data is generated using Gaussian parity. + Parameters + ---------- + n_task1 : int + Total number of train sample for task 1. + n_task2 : int + Total number of train dsample for task 2 + n_test : int, optional (default=1000) + Number of test sample for each task. + task1_angle : float, optional (default=0) + Angle in radian for task 1. + task2_angle : float, optional (default=numpy.pi/2) + Angle in radian for task 2. + n_trees : int, optional (default=10) + Number of total trees to train for each task. + max_depth : int, optional (default=None) + Maximum allowable depth for each tree. + random_state : int, RandomState instance, default=None + Determines random number generation for dataset creation. Pass an int + for reproducible output across multiple function calls. + Returns + ------- + errors : array of shape [6] + Elements of the array is organized as single task error task1, + multitask error task1, single task error task2, + multitask error task2, naive UF error task1, + naive UF task2. + """ + + if n_task1 == 0 and n_task2 == 0: + raise ValueError("Wake up and provide samples to train!!!") + + if random_state != None: + np.random.seed(random_state) + + errors = np.zeros(6, dtype=float) + + progressive_learner = LifelongClassificationForest(default_n_estimators=n_trees) + uf1 = LifelongClassificationForest(default_n_estimators=n_trees) + naive_uf = LifelongClassificationForest(default_n_estimators=n_trees) + uf2 = LifelongClassificationForest(default_n_estimators=n_trees) + + # source data + X_task1, y_task1 = generate_gaussian_parity(n_task1, angle_params=task1_angle) + test_task1, test_label_task1 = generate_gaussian_parity( + n_test, angle_params=task1_angle + ) + + # target data + X_task2, y_task2 = generate_gaussian_parity(n_task2, angle_params=task2_angle) + test_task2, test_label_task2 = generate_gaussian_parity( + n_test, angle_params=task2_angle + ) + + if n_task1 == 0: + progressive_learner.add_task(X_task2, y_task2, n_estimators=n_trees) + uf2.add_task(X_task2, y_task2, n_estimators=n_trees) + + errors[0] = 0.5 + errors[1] = 0.5 + + uf_task2 = uf2.predict(test_task2, task_id=0) + l2f_task2 = progressive_learner.predict(test_task2, task_id=0) + + errors[2] = 1 - np.mean(uf_task2 == test_label_task2) + errors[3] = 1 - np.mean(l2f_task2 == test_label_task2) + + errors[4] = 0.5 + errors[5] = 1 - np.mean(uf_task2 == test_label_task2) + elif n_task2 == 0: + progressive_learner.add_task(X_task1, y_task1, n_estimators=n_trees) + uf1.add_task(X_task1, y_task1, n_estimators=n_trees) + + uf_task1 = uf1.predict(test_task1, task_id=0) + l2f_task1 = progressive_learner.predict(test_task1, task_id=0) + + errors[0] = 1 - np.mean(uf_task1 == test_label_task1) + errors[1] = 1 - np.mean(l2f_task1 == test_label_task1) + + errors[2] = 0.5 + errors[3] = 0.5 + + errors[4] = 1 - np.mean(uf_task1 == test_label_task1) + errors[5] = 0.5 + else: + progressive_learner.add_task(X_task1, y_task1, n_estimators=n_trees) + progressive_learner.add_task(X_task2, y_task2, n_estimators=n_trees) + + uf1.add_task(X_task1, y_task1, n_estimators=2 * n_trees) + uf2.add_task(X_task2, y_task2, n_estimators=2 * n_trees) + + naive_uf_train_x = np.concatenate((X_task1, X_task2), axis=0) + naive_uf_train_y = np.concatenate((y_task1, y_task2), axis=0) + naive_uf.add_task(naive_uf_train_x, naive_uf_train_y, n_estimators=n_trees) + + uf_task1 = uf1.predict(test_task1, task_id=0) + l2f_task1 = progressive_learner.predict(test_task1, task_id=0) + uf_task2 = uf2.predict(test_task2, task_id=0) + l2f_task2 = progressive_learner.predict(test_task2, task_id=1) + naive_uf_task1 = naive_uf.predict(test_task1, task_id=0) + naive_uf_task2 = naive_uf.predict(test_task2, task_id=0) + + errors[0] = 1 - np.mean(uf_task1 == test_label_task1) + errors[1] = 1 - np.mean(l2f_task1 == test_label_task1) + errors[2] = 1 - np.mean(uf_task2 == test_label_task2) + errors[3] = 1 - np.mean(l2f_task2 == test_label_task2) + errors[4] = 1 - np.mean(naive_uf_task1 == test_label_task1) + errors[5] = 1 - np.mean(naive_uf_task2 == test_label_task2) + + return errors + + +def run_batch_experiment(mc_rep, t2_angle): + n_test = 1000 + n_trees = 10 + n_xor = (100 * np.arange(0.25, 7.50, step=0.25)).astype(int) + n_xnor = (100 * np.arange(0.25, 7.75, step=0.25)).astype(int) + mean_error = np.zeros((6, len(n_xor) + len(n_xnor))) + std_error = np.zeros((6, len(n_xor) + len(n_xnor))) + mean_te = np.zeros((4, len(n_xor) + len(n_xnor))) + std_te = np.zeros((4, len(n_xor) + len(n_xnor))) + for i, n1 in enumerate(n_xor): + # run experiment in parallel + error = np.array( + Parallel(n_jobs=1, verbose=0)( + delayed(experiment_batch)( + n1, 0, task2_angle=t2_angle, max_depth=ceil(log2(n1)) + ) + for _ in range(mc_rep) + ) + ) + # extract relevant data and store in arrays + mean_error[:, i] = np.mean(error, axis=0) + std_error[:, i] = np.std(error, ddof=1, axis=0) + mean_te[0, i] = np.mean(error[:, 0]) / np.mean(error[:, 1]) + mean_te[1, i] = np.mean(error[:, 2]) / np.mean(error[:, 3]) + mean_te[2, i] = np.mean(error[:, 0]) / np.mean(error[:, 4]) + mean_te[3, i] = np.mean(error[:, 2]) / np.mean(error[:, 5]) + + # initialize learning on n-xor data + if n1 == n_xor[-1]: + for j, n2 in enumerate(n_xnor): + # run experiment in parallel + error = np.array( + Parallel(n_jobs=1, verbose=0)( + delayed(experiment_batch)( + n1, n2, task2_angle=t2_angle, max_depth=ceil(log2(750)) + ) + for _ in range(mc_rep) + ) + ) + # extract relevant data and store in arrays + mean_error[:, i + j + 1] = np.mean(error, axis=0) + std_error[:, i + j + 1] = np.std(error, ddof=1, axis=0) + mean_te[0, i + j + 1] = np.mean(error[:, 0]) / np.mean(error[:, 1]) + mean_te[1, i + j + 1] = np.mean(error[:, 2]) / np.mean(error[:, 3]) + mean_te[2, i + j + 1] = np.mean(error[:, 0]) / np.mean(error[:, 4]) + mean_te[3, i + j + 1] = np.mean(error[:, 2]) / np.mean(error[:, 5]) + + return mean_error, mean_te + + +def plot_error(results, experiment): + """Plot Generalization Errors for experiment type (RXOR or XNOR)""" + algorithms = [ + "Stream Synergistic Forest", + "Stream Decision Forest", + "Batch Synergistic Forest", + "Batch Decision Forest", + ] + fontsize = 30 + labelsize = 28 + ls = ["-", "--"] + colors = sns.color_palette("bright") + fig = plt.figure(figsize=(26, 14)) + gs = fig.add_gridspec(14, 26) + ax1 = fig.add_subplot(gs[7:, :5]) + # Stream Synergistic Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[0][0], + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[0][2], + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[2][1], + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest XOR + ax1.plot( + (100 * np.arange(0.25, 15, step=0.25)).astype(int), + results[2][4], + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Generalization Error (XOR)", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([0.1, 0.3, 0.5, 0.9]) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + + ax1.text(200, np.mean(ax1.get_ylim()) + 0.5, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 0.5, experiment, fontsize=26) + + ######## RXOR + ax1 = fig.add_subplot(gs[7:, 7:12]) + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int)[30:] + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + results[0][1][30:], + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + results[0][3][30:], + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + results[2][3][30:], + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + results[2][5][30:], + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + ax1.set_ylabel("Generalization Error (%s)" % experiment, fontsize=fontsize) + ax1.legend( + bbox_to_anchor=(1, -0.25), + loc="upper center", + fontsize=20, + ncol=4, + frameon=False, + ) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([0.1, 0.3, 0.5, 0.9]) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + + ax1.text(200, np.mean(ax1.get_ylim()) + 0.5, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 0.5, experiment, fontsize=26) + + #### Transfer Efficiency + ax1 = fig.add_subplot(gs[7:, 14:19]) + algorithms = [ + "Stream Synergistic Forest TE", + "Stream Decision Forest TE", + "Batch Synergistic Forest TE", + "Batch Decision Forest TE", + ] + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int)[30:] + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][0][30:]), + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][1][30:]), + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][1][30:]), + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][3][30:]), + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Log Forward Learning Efficiency", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([-1, 0, 1]) + ax1.set_xlim(-1, 1) + ax1.set_xticks([0, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + ax1.axhline(y=0, c="gray", linewidth=1.5, linestyle="dashed") + + if experiment == "XNOR": + ax1.text(200, np.mean(ax1.get_ylim()) + 2, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 2, experiment, fontsize=26) + else: + ax1.text(200, np.mean(ax1.get_ylim()) + 1.25, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 1.25, experiment, fontsize=26) + + #### BACKWARDS Transfer Efficiency + ax1 = fig.add_subplot(gs[7:, 21:]) + algorithms = [ + "Stream Synergistic Forest TE", + "Stream Decision Forest TE", + "Batch Synergistic Forest TE", + "Batch Decision Forest TE", + ] + rxor_range = (100 * np.arange(0.25, 15, step=0.25)).astype(int) + # Stream Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][2]), + label=algorithms[0], + c=colors[3], + ls=ls[1], + lw=3, + ) + # Stream Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[1][3]), + label=algorithms[1], + c=colors[2], + ls=ls[1], + lw=3, + ) + # Batch Synergistic Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][0]), + label=algorithms[2], + c=colors[3], + ls=ls[0], + lw=3, + ) + # Batch Decision Forest RXOR + ax1.plot( + rxor_range, + np.log(results[3][2]), + label=algorithms[3], + c=colors[2], + ls=ls[0], + lw=3, + ) + + ax1.set_ylabel("Log Backward Learning Efficiency", fontsize=fontsize) + ax1.set_xlabel("Total Sample Size", fontsize=fontsize) + ax1.tick_params(labelsize=labelsize) + # ax1.set_yscale("log") + ax1.yaxis.set_major_formatter(ScalarFormatter()) + ax1.set_yticks([-1, 0, 1]) + ax1.set_xlim(-1, 1) + ax1.set_xticks([25, 750, 1500]) + ax1.axvline(x=750, c="gray", linewidth=1.5, linestyle="dashed") + ax1.axvline(x=1500, c="gray", linewidth=1.5, linestyle="dashed") + right_side = ax1.spines["right"] + right_side.set_visible(False) + top_side = ax1.spines["top"] + top_side.set_visible(False) + ax1.axhline(y=0, c="gray", linewidth=1.5, linestyle="dashed") + + if experiment == "XNOR": + ax1.text(200, np.mean(ax1.get_ylim()) + 2.0, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 2.0, experiment, fontsize=26) + else: + ax1.text(200, np.mean(ax1.get_ylim()) + 1.5, "XOR", fontsize=26) + ax1.text(850, np.mean(ax1.get_ylim()) + 1.5, experiment, fontsize=26) diff --git a/docs/experiments/streaming_forest_tutorial.ipynb b/docs/experiments/streaming_forest_tutorial.ipynb new file mode 100644 index 0000000000..8d9f51e3be --- /dev/null +++ b/docs/experiments/streaming_forest_tutorial.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Streaming Synergistic Forest Tutorial\n", + "**Note:** This is an experimental feature and requires a modified fork of `scikit-learn` with added `partial_fit` functionality: [scikit-learn-stream fork](https://github.com/PSSF23/scikit-learn-stream). Additionally, this notebook uses external functions stored in `tutorials/functions/streaming_forest_functions.py`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from functions import streaming_xor_functions as fn\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the `update_task` function for streaming data\n", + "Current standard implementations of decision forests operate in batch mode. In many real world applications, we are not provided with all data at once and therefore need to incrementally update as data arrives. For incrementally updating decision trees we can use the [scikit-learn-stream fork](https://github.com/PSSF23/scikit-learn-stream) with an added `partial_fit` function for incremental learning. Furthermore, for synergistic learning we can use the function `update_task`. When new data, $x$, arrives with labels, $y$, we can update task $t$ as follows:\n", + "\n", + "`synf.update_task(x,y,task_id = t)`\n", + "\n", + "Note that when using `update_task`, the `classes` argument must be provided on the initial call to `add_task`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synergistic Learning\n", + "\n", + "The goal of synergistic learning is to improve performance on the current task as well as past and future tasks. This can be accomplished through ensembling independent representations, as is done in both Synergistic Forest implementations. \n", + "\n", + "The metric of learning efficiency, as described in Vogelstein et al. 2020, can be used to quantify a classifier's learning abilities. \n", + "\n", + "The **learning efficiency** of an algorithm $f$ for a given task $t$ with sample size $n$ is defined as \n", + "$$\\text{LE}^t_n(f):=\\frac{\\mathbb{E}[R^t(f(\\mathbf{S}^t_n))]}{\\mathbb{E}[R^t(f(\\mathbf{S}_n))]}$$\n", + "If $\\text{LE}^t_n(f) > 1$, $f$ has learned task $t$ with data $\\mathbf{S}_n$\n", + "\n", + "The **forward learning efficiency** of an algorithm $f$ for a given task $t$ with sample size $n$ is defined as \n", + "$$\\text{FLE}^t_n(f):=\\frac{\\mathbb{E}[R^t(f(\\mathbf{S}^t_n))]}{\\mathbb{E}[R^t(f(\\mathbf{S}^{\\leq t}_n))]}$$\n", + "\n", + "If $\\text{FLE}^t_n(f) > 1$, $f$ has leveraged data from past tasks to improve performance on task $t$\n", + "\n", + "The **backward learning efficiency** of an algorithm $f$ for a given task $t$ with sample size $n$ is defined as \n", + "$$\\text{BLE}^t_n(f):=\\frac{\\mathbb{E}[R^t(f(\\mathbf{S}^{\\leq t}_n))]}{\\mathbb{E}[R^t(f(\\mathbf{S}_n))]}$$\n", + "If $\\text{BLE}^t_n(f) > 1$, $f$ has leveraged data from future tasks to improve performance on previous tasks\n", + "\n", + "An algorithm has **synergistically learned** if $\\log\\text{LE}^t_n(f) > 0$ for all $t \\in \\mathcal{T}$ \n", + "\n", + "Conversely, an algorithm has **catastrophically forgotten** if it has negatively learned for all tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gaussian XOR Experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "mc_rep = 25\n", + "rxor_results, xnor_results = fn.run_gaussian_experiments(mc_rep)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gaussian R-XOR" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fn.plot_error(rxor_results, \"R-XOR\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gaussian XNOR" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fn.plot_error(xnor_results, \"XNOR\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Spiral Experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from proglearn.sims import generate_spirals\n", + "from functions import streaming_spirals_functions as spirals" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "spiral3, y_3 = generate_spirals(750, 3, noise=0.8)\n", + "spiral5, y_5 = generate_spirals(750, 5, noise=0.4)\n", + "spirals.plot_spirals(spiral3, y_3, 3, spiral5, y_5, 5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mc_rep = 10\n", + "spiral_results = spirals.run_spiral_experiments(mc_rep)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "spirals.plot_error(spiral_results)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "021eb6924868a04ff0ec55ee3d6a2838e9123a6946b2ca5369814d359127e19d" + }, + "kernelspec": { + "display_name": "Python 3.8.12 ('add_task')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/proglearn/forest.py b/proglearn/forest.py index 1d642ffbca..0f26e800e2 100644 --- a/proglearn/forest.py +++ b/proglearn/forest.py @@ -15,26 +15,21 @@ class LifelongClassificationForest(ClassificationProgressiveLearner): """ A class used to represent a lifelong classification forest. - Parameters ---------- default_n_estimators : int, default=100 The number of trees used in the Lifelong Classification Forest used if 'n_estimators' is not fed to add_{task, transformer}. - default_tree_construction_proportion : int, default=0.67 The proportions of the input data set aside to train each decision tree. The remainder of the data is used to fill in voting posteriors. This is used if 'tree_construction_proportion' is not fed to add_task. - default_kappa : float, default=np.inf The coefficient for finite sample correction. This is used if 'kappa' is not fed to add_task. - default_max_depth : int, default=30 The maximum depth of a tree in the Lifelong Classification Forest. This is used if 'max_depth' is not fed to add_task. - Attributes ---------- task_id_to_X : dict @@ -42,98 +37,80 @@ class LifelongClassificationForest(ClassificationProgressiveLearner): and values of type ndarray corresponding to the input data matrix X. This dictionary thus maps input data matrix to the task where posteriors are to be estimated. - task_id_to_y : dict A dictionary with keys of type obj corresponding to task ids and values of type ndarray corresponding to output data matrix y. This dictionary thus maps output data matrix to the task where posteriors are to be estimated. - transformer_id_to_X : dict A dictionary with keys of type obj corresponding to transformer ids and values of type ndarray corresponding to the output data matrix X. This dictionary thus maps input data matrix to a particular transformer. - transformer_id_to_y : dict A dictionary with keys of type obj corresponding to transformer ids and values of type ndarray corresponding to the output data matrix y. This dictionary thus maps output data matrix to a particular transformer. - transformer_id_to_transformers : dict A dictionary with keys of type obj corresponding to transformer ids and values of type obj corresponding to a transformer. This dictionary thus maps transformer ids to the corresponding transformers. - task_id_to_trasnformer_id_to_voters : dict A nested dictionary with outer key of type obj, corresponding to task ids inner key of type obj, corresponding to transformer ids, and values of type obj, corresponding to a voter. This dictionary thus maps voters to a corresponding transformer assigned to a particular task. - task_id_to_decider : dict A dictionary with keys of type obj, corresponding to task ids, and values of type obj corresponding to a decider. This dictionary thus maps deciders to a particular task. - task_id_to_decider_class : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a decider class. This dictionary thus maps decider classes to a particular task id. - task_id_to_voter_class : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a voter class. This dictionary thus maps voter classes to a particular task id. - task_id_to_voter_kwargs : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a voter kwargs. This dictionary thus maps voter kwargs to a particular task id. - task_id_to_decider_kwargs : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a decider kwargs. This dictionary thus maps decider kwargs to a particular task id. - task_id_to_bag_id_to_voter_data_idx : dict A nested dictionary with outer keys of type obj corresponding to task ids inner keys of type obj corresponding to bag ids and values of type obj corresponding to voter data indices. This dictionary thus maps voter data indices to particular bags for particular tasks. - task_id_to_decider_idx : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to decider indices. This dictionary thus maps decider indices to particular tasks. - default_transformer_class : TreeClassificationTransformer The class of transformer to which the forest defaults if None is provided in any of the functions which add or set transformers. - default_transformer_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of transformer the forest defaults if None is provided in any of the functions which add or set transformers. - default_voter_class : TreeClassificationVoter The class of voter to which the forest defaults if None is provided in any of the functions which add or set voters. - default_voter_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of voter the forest defaults if None is provided in any of the functions which add or set voters. - default_decider_class : SimpleArgmaxAverage The class of decider to which the forest defaults if None is provided in any of the functions which add or set deciders. - default_decider_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of decider the @@ -171,6 +148,7 @@ def add_task( tree_construction_proportion="default", kappa="default", max_depth="default", + classes=None, ): """ adds a task with id task_id, max tree depth max_depth, given input data matrix X @@ -178,34 +156,26 @@ def add_task( data for training and voting based on tree_construction_proportion and uses the value of kappa to determine whether the learner will have finite sample correction. - Parameters ---------- X : ndarray The input data matrix. - y : ndarray The output (response) data matrix. - task_id : obj, default=None The id corresponding to the task being added. - n_estimators : int or str, default='default' The number of trees used for the given task. - tree_construction_proportion : int or str, default='default' The proportions of the input data set aside to train each decision tree. The remainder of the data is used to fill in voting posteriors. The default is used if 'default' is provided. - kappa : float or str, default='default' The coefficient for finite sample correction. The default is used if 'default' is provided. - max_depth : int or str, default='default' The maximum depth of a tree in the Lifelong Classification Forest. The default is used if 'default' is provided. - Returns ------- self : LifelongClassificationForest @@ -237,6 +207,7 @@ def add_task( "kappa": kappa, }, decider_kwargs={"classes": np.unique(y)}, + classes=classes, ) def add_transformer( @@ -246,31 +217,26 @@ def add_transformer( transformer_id=None, n_estimators="default", max_depth="default", + classes=None, ): """ adds a transformer with id transformer_id and max tree depth max_depth, trained on given input data matrix, X, and output data matrix, y, to the Lifelong Classification Forest. Also trains the voters and deciders from new transformer to previous tasks, and will train voters and deciders from this transformer to all new tasks. - Parameters ---------- X : ndarray The input data matrix. - y : ndarray The output (response) data matrix. - transformer_id : obj, default=None The id corresponding to the transformer being added. - n_estimators : int or str, default='default' The number of trees used for the given task. - max_depth : int or str, default='default' The maximum depth of a tree in the Lifelong Classification Forest. The default is used if 'default' is provided. - Returns ------- self : LifelongClassificationForest @@ -288,20 +254,115 @@ def add_transformer( transformer_kwargs={"kwargs": {"max_depth": max_depth}}, transformer_id=transformer_id, num_transformers=n_estimators, + classes=classes, + ) + + def update_task( + self, + X, + y, + task_id=None, + n_estimators="default", + tree_construction_proportion="default", + kappa="default", + max_depth="default", + classes=None, + ): + """ + updates a task with id task_id, max tree depth max_depth, given input data matrix X + and output data matrix y, to the Lifelong Classification Forest. Also splits + data for training and voting based on tree_construction_proportion and uses the + value of kappa to determine whether the learner will have + finite sample correction. + Parameters + ---------- + X : ndarray + The input data matrix. + y : ndarray + The output (response) data matrix. + task_id : obj, default=None + The id corresponding to the task being added. + n_estimators : int or str, default='default' + The number of trees used for the given task. + tree_construction_proportion : int or str, default='default' + The proportions of the input data set aside to train each decision + tree. The remainder of the data is used to fill in voting posteriors. + The default is used if 'default' is provided. + kappa : float or str, default='default' + The coefficient for finite sample correction. + The default is used if 'default' is provided. + max_depth : int or str, default='default' + The maximum depth of a tree in the Lifelong Classification Forest. + The default is used if 'default' is provided. + Returns + ------- + self : LifelongClassificationForest + The object itself. + """ + if n_estimators == "default": + n_estimators = self.default_n_estimators + if tree_construction_proportion == "default": + tree_construction_proportion = self.default_tree_construction_proportion + if kappa == "default": + kappa = self.default_kappa + if max_depth == "default": + max_depth = self.default_max_depth + + X, y = check_X_y(X, y) + + return super().update_task( + X, + y, + classes=classes, + task_id=task_id, + transformer_voter_decider_split=[ + tree_construction_proportion, + 1 - tree_construction_proportion, + 0, + ], + num_transformers=n_estimators, + transformer_kwargs={"kwargs": {"max_depth": max_depth}}, + voter_kwargs={ + "classes": np.unique(y), + "kappa": kappa, + }, + decider_kwargs={"classes": np.unique(y)}, + ) + + def update_transformer( + self, + X, + y, + classes=None, + transformer_id=None, + n_estimators="default", + max_depth="default", + ): + + if n_estimators == "default": + n_estimators = self.default_n_estimators + if max_depth == "default": + max_depth = self.default_max_depth + + X, y = check_X_y(X, y) + return super().update_transformer( + X, + y, + classes=classes, + transformer_kwargs={"kwargs": {"max_depth": max_depth}}, + transformer_id=transformer_id, + num_transformers=n_estimators, ) def predict_proba(self, X, task_id): """ estimates class posteriors under task_id for each example in input data X. - Parameters ---------- X : ndarray The input data matrix. - task_id: The id corresponding to the task being mapped to. - Returns ------- y_proba_hat : ndarray of shape [n_samples, n_classes] @@ -312,15 +373,12 @@ def predict_proba(self, X, task_id): def predict(self, X, task_id): """ predicts class labels under task_id for each example in input data X. - Parameters ---------- X : ndarray The input data matrix. - task_id : obj The id corresponding to the task being mapped to. - Returns ------- y_hat : ndarray of shape [n_samples] @@ -332,52 +390,42 @@ def predict(self, X, task_id): class UncertaintyForest(LifelongClassificationForest): """ A class used to represent an uncertainty forest. - Parameters ---------- n_estimators : int, default=100 The number of trees in the UncertaintyForest - kappa : float, default=np.inf The coefficient for finite sample correction. If set to the default value, finite sample correction is not performed. - max_depth : int, default=30 The maximum depth of a tree in the UncertaintyForest - tree_construction_proportion : float, default = 0.67 The proportions of the input data set aside to train each decision tree. The remainder of the data is used to fill in voting posteriors. - Attributes ---------- default_transformer_class : TreeClassificationTransformer The class of transformer to which the forest defaults if None is provided in any of the functions which add or set transformers. - default_transformer_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of transformer the forest defaults if None is provided in any of the functions which add or set transformers. - default_voter_class : TreeClassificationVoter The class of voter to which the forest defaults if None is provided in any of the functions which add or set voters. - default_voter_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of voter the forest defaults if None is provided in any of the functions which add or set voters. - default_decider_class : SimpleArgmaxAverage The class of decider to which the forest defaults if None is provided in any of the functions which add or set deciders. - default_decider_kwargs : dict A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of decider the @@ -402,15 +450,12 @@ def __init__( def fit(self, X, y): """ fits forest to data X with labels y - Parameters ---------- X : array of shape [n_samples, n_features] The data that will be trained on - y : array of shape [n_samples] The label for cluster membership of the given data - Returns ------- self : UncertaintyForest @@ -422,12 +467,10 @@ def fit(self, X, y): def predict_proba(self, X): """ estimates class posteriors for each example in input data X. - Parameters ---------- X : array of shape [n_samples, n_features] The data whose posteriors we are estimating. - Returns ------- y_proba_hat : ndarray of shape [n_samples, n_classes] @@ -438,12 +481,10 @@ def predict_proba(self, X): def predict(self, X): """ predicts class labels for each example in input data X. - Parameters ---------- X : array of shape [n_samples, n_features] The data on which we are performing inference. - Returns ------- y_hat : ndarray of shape [n_samples] diff --git a/proglearn/progressive_learner.py b/proglearn/progressive_learner.py index e4d5cd4aab..8bfa5e0819 100755 --- a/proglearn/progressive_learner.py +++ b/proglearn/progressive_learner.py @@ -13,42 +13,35 @@ class ProgressiveLearner(BaseProgressiveLearner): A (mostly) internal class for progressive learning. Most users who desire to utilize ProgLearn should use the classes defined in {network, forest}.py instead of this class. - Parameters ---------- default_transformer_class : BaseTransformer, default=None The class of transformer to which the progressive learner defaults if None is provided in any of the functions which add or set transformers. - default_transformer_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of transformer the progressive learner defaults if None is provided in any of the functions which add or set transformers. - default_voter_class : BaseVoter, default=None The class of voter to which the progressive learner defaults if None is provided in any of the functions which add or set voters. - default_voter_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of voter the progressive learner defaults if None is provided in any of the functions which add or set voters. - default_decider_class : BaseDecider, default=None The class of decider to which the progressive learner defaults if None is provided in any of the functions which add or set deciders. - default_decider_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines to which type of decider the progressive learner defaults if None is provided in any of the functions which add or set deciders. - Attributes ---------- task_id_to_X : dict @@ -56,66 +49,54 @@ class ProgressiveLearner(BaseProgressiveLearner): and values of type ndarray corresponding to the input data matrix X. This dictionary thus maps input data matrix to the task where posteriors are to be estimated. - task_id_to_y : dict A dictionary with keys of type obj corresponding to task ids and values of type ndarray corresponding to output data matrix y. This dictionary thus maps output data matrix to the task where posteriors are to be estimated. - transformer_id_to_X : dict A dictionary with keys of type obj corresponding to transformer ids and values of type ndarray corresponding to the output data matrix X. This dictionary thus maps input data matrix to a particular transformer. - transformer_id_to_y : dict A dictionary with keys of type obj corresponding to transformer ids and values of type ndarray corresponding to the output data matrix y. This dictionary thus maps output data matrix to a particular transformer. - transformer_id_to_transformers : dict A dictionary with keys of type obj corresponding to transformer ids and values of type obj corresponding to a transformer. This dictionary thus maps transformer ids to the corresponding transformers. - - task_id_to_trasnformer_id_to_voters : dict + task_id_to_transformer_id_to_voters : dict A nested dictionary with outer key of type obj, corresponding to task ids inner key of type obj, corresponding to transformer ids, and values of type obj, corresponding to a voter. This dictionary thus maps voters to a corresponding transformer assigned to a particular task. - task_id_to_decider : dict A dictionary with keys of type obj, corresponding to task ids, and values of type obj corresponding to a decider. This dictionary thus maps deciders to a particular task. - task_id_to_decider_class : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a decider class. This dictionary thus maps decider classes to a particular task id. - task_id_to_voter_class : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a voter class. This dictionary thus maps voter classes to a particular task id. - task_id_to_voter_kwargs : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a voter kwargs. This dictionary thus maps voter kwargs to a particular task id. - task_id_to_decider_kwargs : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to a decider kwargs. This dictionary thus maps decider kwargs to a particular task id. - task_id_to_bag_id_to_voter_data_idx : dict A nested dictionary with outer keys of type obj corresponding to task ids inner keys of type obj corresponding to bag ids and values of type obj corresponding to voter data indices. This dictionary thus maps voter data indices to particular bags for particular tasks. - task_id_to_decider_idx : dict A dictionary with keys of type obj corresponding to task ids and values of type obj corresponding to decider indices. This dictionary @@ -173,6 +154,9 @@ def _append_transformer(self, transformer_id, transformer): else: self.transformer_id_to_transformers[transformer_id] = [transformer] + def _replace_transformer(self, transformer_id, transformer): + self.transformer_id_to_transformers[transformer_id] = [transformer] + def _append_voter(self, transformer_id, task_id, voter): if task_id in list(self.task_id_to_transformer_id_to_voters.keys()): if transformer_id in list( @@ -192,10 +176,21 @@ def _append_voter(self, transformer_id, task_id, voter): def _append_voter_data_idx(self, task_id, bag_id, voter_data_idx): if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = voter_data_idx else: self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} + def _update_voter_data_idx(self, task_id, bag_id, voter_data_idx): + if task_id in list(self.task_id_to_bag_id_to_voter_data_idx.keys()): + prev = self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] + new = voter_data_idx + self.task_id_to_bag_id_to_voter_data_idx[task_id][bag_id] = np.append( + prev, new + ) + else: + self.task_id_to_bag_id_to_voter_data_idx[task_id] = {bag_id: voter_data_idx} + def _append_decider_idx(self, task_id, decider_idx): self.task_id_to_decider_idx[task_id] = decider_idx @@ -218,6 +213,90 @@ def _bifurcate_decider_idxs(self, ra, transformer_voter_decider_split): ) return first_idx, second_idx + def _update_transformer( + self, + X, + y, + transformer_data_proportion, + transformer_voter_data_idx, + transformer_id, + num_transformers, + transformer_class, + transformer_kwargs, + backward_task_ids, + classes=None, + decider_kwargs=None, + ): + + if transformer_id not in list(self.task_id_to_X.keys()): + self.transformer_id_to_X[transformer_id] = X + if transformer_id not in list(self.task_id_to_y.keys()): + self.transformer_id_to_y[transformer_id] = y + + backward_task_ids = ( + backward_task_ids if backward_task_ids is not None else self.get_task_ids() + ) + + # for all transformers referring to specified task + counter = 0 + for transformer in self.transformer_id_to_transformers[transformer_id]: + + # Check data and assign data for training + + if X is not None: + n = len(X) + elif y is not None: + n = len(y) + else: + n = None + if n is not None: + transformer_data_idx = np.random.choice( + transformer_voter_data_idx, + int(transformer_data_proportion * n), + replace=False, + ) + else: + transformer_data_idx = None + + X2 = ( + self.transformer_id_to_X[transformer_id] + if transformer_id in list(self.transformer_id_to_X.keys()) + else self.task_id_to_X[transformer_id] + ) + y2 = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] + ) + + if transformer_data_idx is not None: + X2, y2 = X2[transformer_data_idx], y2[transformer_data_idx] + + transformer.transformer_.partial_fit(X2, y2, classes) + + voter_data_idx = np.delete( + transformer_voter_data_idx, + np.isin(transformer_voter_data_idx, transformer_data_idx), + ) + + self._update_voter_data_idx( + task_id=transformer_id, + bag_id=counter, + voter_data_idx=voter_data_idx, + ) + counter = counter + 1 + + for existing_task_id in np.intersect1d(backward_task_ids, self.get_task_ids()): + self.set_voter(transformer_id=transformer_id, task_id=existing_task_id) + self.set_decider( + task_id=existing_task_id, + transformer_ids=list( + self.task_id_to_transformer_id_to_voters[existing_task_id].keys() + ), + ) + + return self + def _add_transformer( self, X, @@ -229,13 +308,16 @@ def _add_transformer( transformer_class, transformer_kwargs, backward_task_ids, + classes, ): + if transformer_id is None: transformer_id = len(self.get_transformer_ids()) backward_task_ids = ( backward_task_ids if backward_task_ids is not None else self.get_task_ids() ) + transformer_voter_data_idx = ( range(len(X)) if transformer_voter_data_idx is None @@ -261,6 +343,27 @@ def _add_transformer( int(transformer_data_proportion * n), replace=False, ) + if classes is not None: + # raise ValueError to avoid infinite loop + if int(transformer_data_proportion * n) < len(classes): + raise ValueError( + "length of X times transformer_data_proportion must exceed number of classes" + ) + ensure_all_classes = False + while ensure_all_classes is False: + transformer_data_idx = np.random.choice( + transformer_voter_data_idx, + int(transformer_data_proportion * n), + replace=False, + ) + y = ( + self.transformer_id_to_y[transformer_id] + if transformer_id in list(self.transformer_id_to_y.keys()) + else self.task_id_to_y[transformer_id] + ) + if len(np.unique(y[transformer_data_idx])) == len(classes): + ensure_all_classes = True + else: transformer_data_idx = None self.set_transformer( @@ -297,7 +400,6 @@ def set_transformer( transformer_class=None, transformer_kwargs=None, ): - if transformer_id is None: transformer_id = len(self.get_transformer_ids()) @@ -362,10 +464,6 @@ def set_voter( bag_id=None, ): - # Type check X - - # Type check y - if task_id is None: task_id = len(self.get_task_ids()) @@ -462,6 +560,7 @@ def set_decider( self.task_id_to_decider[task_id] = decider_class(**decider_kwargs) decider_idx = self.task_id_to_decider_idx[task_id] + self.task_id_to_decider[task_id].fit( X[decider_idx], y[decider_idx], @@ -483,26 +582,23 @@ def add_transformer( transformer_class=None, transformer_kwargs=None, backward_task_ids=None, + classes=None, ): """ Adds a transformer to the progressive learner and trains the voters and deciders from this new transformer to the specified backward_task_ids. - Parameters ---------- X : ndarray Input data matrix. - y : ndarray Output (response) data matrix. - transformer_data_proportion : float, default=1.0 The proportion of the data set aside to train the transformer. The remainder of the data is used to train voters. This is used in the case that you are using a bagging algorithm and want the various components in that bagging ensemble to train on disjoint subsets of the data. This parameter is mostly for internal use. - transformer_voter_data_idx : ndarray, default=None A 1d array of type int used to specify the aggregate indices of the input data used to train the transformers and voters. This is used in the @@ -510,25 +606,19 @@ def add_transformer( transformers or voters (e.g. X and/or y contains decider training data disjoint from the transformer/voter data). This parameter is mostly for internal use. - transformer_id : obj, default=None The id corresponding to the transformer being added. - num_transformers : int, default=1 The number of transformers to add corresponding to the given inputs. - transformer_class : BaseTransformer, default=None The class of the transformer(s) being added. - transformer_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines the kwargs of the transformer(s) being added. - backward_task_ids : ndarray, default=None A 1d array of type obj used to specify to which existing task voters and deciders will be trained from the transformer(s) being added. - Returns ------- self : ProgressiveLearner @@ -544,6 +634,7 @@ def add_transformer( transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, backward_task_ids=backward_task_ids, + classes=classes, ) def add_task( @@ -561,7 +652,9 @@ def add_task( decider_kwargs=None, backward_task_ids=None, forward_transformer_ids=None, + classes=None, ): + """ Adds a task to the progressive learner. Optionally trains one or more transformer from the input data (if num_transformers > 0), adds voters @@ -570,18 +663,14 @@ def add_task( specified in forward_transformer_ids (and from the newly added transformer(s) corresponding to the input task_id if num_transformers > 0) to the new task_id. - Parameters ---------- X : ndarray Input data matrix. - y : ndarray Output (response) data matrix. - task_id : obj, default=None The id corresponding to the task being added. - transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] A 1d array of length 3. The 0th index indicates the proportions of the input data used to train the (optional) newly added transformer(s) corresponding to @@ -596,43 +685,33 @@ def add_task( proportion of the data set aside to train the decider - these indices are saved internally and will be used to train all further deciders corresponding to this task for all function calls. - num_transformers : int, default=1 The number of transformers to add corresponding to the given inputs. - transformer_class : BaseTransformer, default=None The class of the transformer(s) being added. - transformer_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines the kwargs of the transformer(s) being added. - voter_class : BaseVoter, default=None The class of the voter(s) being added. - voter_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines the kwargs of the voter(s) being added. - decider_class : BaseDecider, default=None The class of the decider(s) being added. - decider_kwargs : dict, default=None A dictionary with keys of type string and values of type obj corresponding to the given string kwarg. This determines the kwargs of the decider(s) being added. - backward_task_ids : ndarray, default=None A 1d array of type obj used to specify to which existing task voters and deciders will be trained from the transformer(s) being added. - foward_transformer_ids : ndarray, default=None A 1d array of type obj used to specify from which existing transformer(s) voters and deciders will be trained to the new task. If num_transformers > 0, the input task_id corresponding to the task being added is automatically appended to this 1d array. - Returns ------- self : ProgressiveLearner @@ -650,11 +729,13 @@ def add_task( transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( range(len(X)), transformer_voter_decider_split ) + self._append_decider_idx(task_id, decider_idx) # add new transformer and train voters and decider # from new transformer to previous tasks if num_transformers > 0: + self._add_transformer( X, y, @@ -667,6 +748,7 @@ def add_task( transformer_class=transformer_class, transformer_kwargs=transformer_kwargs, backward_task_ids=backward_task_ids, + classes=classes, ) # train voters and decider from previous (and current) transformers to new task @@ -690,6 +772,7 @@ def add_task( transformer_ids = np.concatenate([forward_transformer_ids, task_id]) else: transformer_ids = self.get_transformer_ids() + self.set_decider( task_id=task_id, transformer_ids=transformer_ids, @@ -699,33 +782,166 @@ def add_task( return self + def update_task( + self, + X, + y, + classes=None, + task_id=None, + transformer_voter_decider_split=[0.67, 0.33, 0], + num_transformers=1, + transformer_class=None, + transformer_kwargs=None, + voter_class=None, + voter_kwargs=None, + decider_class=None, + decider_kwargs=None, + backward_task_ids=None, + forward_transformer_ids=None, + ): + + """ + Updates a task for the progressive learner. Concatenates new data to existing + data for specified task and partial fits transformers. Updates voters and decider + from updated transformers. + Parameters + ---------- + X : ndarray + Input data matrix. + y : ndarray + Output (response) data matrix. + task_id : obj, default=None + The id corresponding to the task being added. + transformer_voter_decider_split : ndarray, default=[0.67, 0.33, 0] + A 1d array of length 3. The 0th index indicates the proportions of the input + data used to train the (optional) newly added transformer(s) corresponding to + the task_id provided in this function call. The 1st index indicates the proportion of + the data set aside to train the voter(s) from these (optional) newly added + transformer(s) to the task_id provided in this function call. For all other tasks, + the aggregate transformer and voter data pairs from those tasks are used to train + the voter(s) from these (optional) newly added transformer(s) to those tasks; + for all other transformers, the aggregate transformer and voter data provided in + this function call is used to train the voter(s) from those transformers to + the task_id provided in this function call. The 2nd index indicates the + proportion of the data set aside to train the decider - these indices are saved + internally and will be used to train all further deciders corresponding to this + task for all function calls. + num_transformers : int, default=1 + The number of transformers to add corresponding to the given inputs. + transformer_class : BaseTransformer, default=None + The class of the transformer(s) being added. + transformer_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the transformer(s) + being added. + voter_class : BaseVoter, default=None + The class of the voter(s) being added. + voter_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the voter(s) + being added. + decider_class : BaseDecider, default=None + The class of the decider(s) being added. + decider_kwargs : dict, default=None + A dictionary with keys of type string and values of type obj corresponding + to the given string kwarg. This determines the kwargs of the decider(s) + being added. + backward_task_ids : ndarray, default=None + A 1d array of type obj used to specify to which existing task voters and deciders + will be trained from the transformer(s) being added. + foward_transformer_ids : ndarray, default=None + A 1d array of type obj used to specify from which existing transformer(s) voters and + deciders will be trained to the new task. If num_transformers > 0, the input task_id + corresponding to the task being added is automatically appended to this 1d array. + Returns + ------- + self : ProgressiveLearner + The object itself. + """ + + if task_id is None: + print("Error: No Task ID inputted") + return self + # come up with something that has fewer collision + self.task_id_to_transformer_id_to_voters[task_id] = {} + + self.task_id_to_X[task_id] = np.concatenate( + (self.task_id_to_X[task_id], X), axis=0 + ) + self.task_id_to_y[task_id] = np.concatenate( + (self.task_id_to_y[task_id], y), axis=0 + ) + + # split into transformer/voter and decider data + + transformer_voter_data_idx, decider_idx = self._bifurcate_decider_idxs( + range(len(X)), transformer_voter_decider_split + ) + transformer_voter_data_idx += len(self.task_id_to_X[task_id]) - len(X) + decider_idx += len(self.task_id_to_X[task_id]) - len(X) + + self._append_decider_idx(task_id, decider_idx) + + # updates transformer and train voters and decider + # from updated transformer to previous tasks + if num_transformers > 0: + self._update_transformer( + X, + y, + classes=classes, + transformer_data_proportion=transformer_voter_decider_split[0] + if transformer_voter_decider_split + else 1, + transformer_voter_data_idx=transformer_voter_data_idx, + transformer_id=task_id, + num_transformers=num_transformers, + transformer_class=transformer_class, + transformer_kwargs=transformer_kwargs, + backward_task_ids=backward_task_ids, + decider_kwargs=decider_kwargs, + ) + + # train voters and decider + for transformer_id in self.get_transformer_ids(): + self.set_voter( + transformer_id=transformer_id, + task_id=task_id, + voter_class=voter_class, + voter_kwargs=voter_kwargs, + ) + self.set_decider( + task_id=task_id, + transformer_ids=self.get_transformer_ids(), + decider_class=decider_class, + decider_kwargs=decider_kwargs, + ) + return self + def predict(self, X, task_id, transformer_ids=None): """ predicts labels under task_id for each example in input data X using the given transformer_ids. - Parameters ---------- X : ndarray The input data matrix. - task_id : obj The id corresponding to the task being mapped to. - transformer_ids : list, default=None The list of transformer_ids through which a user would like to send X (which will be pipelined with their corresponding voters) to make an inference prediction. - Returns ------- y_hat : ndarray of shape [n_samples] predicted class label per example """ + if self.task_id_to_decider == {}: raise NotFittedError decider = self.task_id_to_decider[task_id] + return decider.predict(X, transformer_ids=transformer_ids) @@ -742,20 +958,16 @@ def predict_proba(self, X, task_id, transformer_ids=None): """ predicts posteriors under task_id for each example in input data X using the given transformer_ids. - Parameters ---------- X : ndarray The input data matrix. - task_id : obj The id corresponding to the task being mapped to. - transformer_ids : list, default=None The list of transformer_ids through which a user would like to send X (which will be pipelined with their corresponding voters) to estimate posteriors. - Returns ------- y_proba_hat : ndarray of shape [n_samples, n_classes] diff --git a/proglearn/sims/spiral_sim.py b/proglearn/sims/spiral_sim.py index cf5c4a1851..42ed74a612 100644 --- a/proglearn/sims/spiral_sim.py +++ b/proglearn/sims/spiral_sim.py @@ -1,90 +1,90 @@ -import numpy as np - - -def generate_spirals( - n_samples, - n_class=2, - noise=0.3, - random_state=None, -): - """ - Generate 2-dimensional Gaussian XOR distribution. - (Classic XOR problem but each point is the - center of a Gaussian blob distribution) - - Parameters - ---------- - - n_samples : int - Total number of points divided among the four - clusters with equal probability. - - n_class : array of shape [n_centers], optional (default=2) - Number of class for the spiral simulation. - - noise : float, optional (default=0.3) - Parameter controlling the spread of each class. - - random_state : int, RandomState instance, default=None - Determines random number generation for dataset creation. Pass an int - for reproducible output across multiple function calls. - - - Returns - ------- - - X : array of shape [n_samples, 2] - The generated samples. - y : array of shape [n_samples] - The integer labels for cluster membership of each sample. - """ - - if random_state != None: - np.random.seed(random_state) - - X = [] - y = [] - - if n_class == 2: - turns = 2 - elif n_class == 3: - turns = 2.5 - elif n_class == 5: - turns = 3.5 - elif n_class == 7: - turns = 4.5 - else: - raise ValueError("sorry, can't currently surpport %s classes " % n_class) - - mvt = np.random.multinomial(n_samples, 1 / n_class * np.ones(n_class)) - - if n_class == 2: - r = np.random.uniform(0, 1, size=int(n_samples / n_class)) - r = np.sort(r) - t = np.linspace( - 0, np.pi * 4 * turns / n_class, int(n_samples / n_class) - ) + np.random.normal(0, noise, int(n_samples / n_class)) - dx = r * np.cos(t) - dy = r * np.sin(t) - - X.append(np.vstack([dx, dy]).T) - X.append(np.vstack([-dx, -dy]).T) - y += [0] * int(n_samples / n_class) - y += [1] * int(n_samples / n_class) - else: - for j in range(1, n_class + 1): - r = np.linspace(0.01, 1, int(mvt[j - 1])) - t = np.linspace( - (j - 1) * np.pi * 4 * turns / n_class, - j * np.pi * 4 * turns / n_class, - int(mvt[j - 1]), - ) + np.random.normal(0, noise, int(mvt[j - 1])) - - dx = r * np.cos(t) - dy = r * np.sin(t) - - dd = np.vstack([dx, dy]).T - X.append(dd) - y += [j - 1] * int(mvt[j - 1]) - - return np.vstack(X), np.array(y).astype(int) +import numpy as np + + +def generate_spirals( + n_samples, + n_class=2, + noise=0.3, + random_state=None, +): + """ + Generate 2-dimensional Gaussian XOR distribution. + (Classic XOR problem but each point is the + center of a Gaussian blob distribution) + + Parameters + ---------- + + n_samples : int + Total number of points divided among the four + clusters with equal probability. + + n_class : array of shape [n_centers], optional (default=2) + Number of class for the spiral simulation. + + noise : float, optional (default=0.3) + Parameter controlling the spread of each class. + + random_state : int, RandomState instance, default=None + Determines random number generation for dataset creation. Pass an int + for reproducible output across multiple function calls. + + + Returns + ------- + + X : array of shape [n_samples, 2] + The generated samples. + y : array of shape [n_samples] + The integer labels for cluster membership of each sample. + """ + + if random_state != None: + np.random.seed(random_state) + + X = [] + y = [] + + if n_class == 2: + turns = 2 + elif n_class == 3: + turns = 2.5 + elif n_class == 5: + turns = 3.5 + elif n_class == 7: + turns = 4.5 + else: + raise ValueError("sorry, can't currently surpport %s classes " % n_class) + + mvt = np.random.multinomial(n_samples, 1 / n_class * np.ones(n_class)) + + if n_class == 2: + r = np.random.uniform(0, 1, size=int(n_samples / n_class)) + r = np.sort(r) + t = np.linspace( + 0, np.pi * 4 * turns / n_class, int(n_samples / n_class) + ) + np.random.normal(0, noise, int(n_samples / n_class)) + dx = r * np.cos(t) + dy = r * np.sin(t) + + X.append(np.vstack([dx, dy]).T) + X.append(np.vstack([-dx, -dy]).T) + y += [0] * int(n_samples / n_class) + y += [1] * int(n_samples / n_class) + else: + for j in range(1, n_class + 1): + r = np.linspace(0.01, 1, int(mvt[j - 1])) + t = np.linspace( + (j - 1) * np.pi * 4 * turns / n_class, + j * np.pi * 4 * turns / n_class, + int(mvt[j - 1]), + ) + np.random.normal(0, noise, int(mvt[j - 1])) + + dx = r * np.cos(t) + dy = r * np.sin(t) + + dd = np.vstack([dx, dy]).T + X.append(dd) + y += [j - 1] * int(mvt[j - 1]) + + return np.vstack(X), np.array(y).astype(int) diff --git a/proglearn/tests/test_forest.py b/proglearn/tests/test_forest.py index 11927409b9..d895861eb4 100644 --- a/proglearn/tests/test_forest.py +++ b/proglearn/tests/test_forest.py @@ -99,6 +99,29 @@ def test_predict_proba(self): u2 = l2f.predict_proba(np.array([0]).reshape(1, -1), task_id=0) assert np.array_equiv(u1, u2) + # def test_update_task(self): + # np.random.seed(1) + + # l2f = LifelongClassificationForest() + + # X = np.concatenate((np.zeros(100), np.ones(100))).reshape(-1, 1) + # y = np.concatenate((np.zeros(100), np.ones(100))) + + # l2f.add_task(X, y) + # u1 = l2f.predict_proba(np.array([0]).reshape(1, -1), task_id=0) + # u2 = l2f.predict_proba(np.array([1]).reshape(1, -1), task_id=0) + + # X2 = np.concatenate((np.zeros(100), np.ones(100))).reshape(-1, 1) + # y2 = np.concatenate((np.zeros(100), np.ones(100))) + + # X3 = np.concatenate((X, X2)) + # y3 = np.concatenate((y, y2)) + + # l2f.update_task(X2, y2, task_id=0) + + # assert np.array_equiv(l2f.task_id_to_X[0], X3) + # assert np.array_equiv(l2f.task_id_to_y[0], y3) + class TestUncertaintyForest: def test_initialize(self): diff --git a/proglearn/tests/test_system.py b/proglearn/tests/test_system.py index bfa88c7061..ba8349df5c 100644 --- a/proglearn/tests/test_system.py +++ b/proglearn/tests/test_system.py @@ -1,136 +1,136 @@ -import pytest -import numpy as np - -from proglearn.progressive_learner import ProgressiveLearner -from proglearn.deciders import SimpleArgmaxAverage -from proglearn.transformers import ( - TreeClassificationTransformer, - NeuralClassificationTransformer, -) -from proglearn.voters import TreeClassificationVoter, KNNClassificationVoter - - -def generate_2d_rotation(theta=0, acorn=None): - if acorn is not None: - np.random.seed(acorn) - - R = np.array([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) - - return R - - -def generate_gaussian_parity( - n, mean=np.array([-1, -1]), cov_scale=1, angle_params=None, k=1, acorn=None -): - if acorn is not None: - np.random.seed(acorn) - - d = len(mean) - - if mean[0] == -1 and mean[1] == -1: - mean = mean + 1 / 2**k - - mnt = np.random.multinomial(n, 1 / (4**k) * np.ones(4**k)) - cumsum = np.cumsum(mnt) - cumsum = np.concatenate(([0], cumsum)) - - Y = np.zeros(n) - X = np.zeros((n, d)) - - for i in range(2**k): - for j in range(2**k): - temp = np.random.multivariate_normal( - mean, cov_scale * np.eye(d), size=mnt[i * (2**k) + j] - ) - temp[:, 0] += i * (1 / 2 ** (k - 1)) - temp[:, 1] += j * (1 / 2 ** (k - 1)) - - X[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = temp - - if i % 2 == j % 2: - Y[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = 0 - else: - Y[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = 1 - - if d == 2: - if angle_params is None: - angle_params = np.random.uniform(0, 2 * np.pi) - - R = generate_2d_rotation(angle_params) - X = X @ R - - else: - raise ValueError("d=%i not implemented!" % (d)) - - return X, Y.astype(int) - - -class TestSystem: - def test_nxor(self): - # tests proglearn on xor nxor simulation data - np.random.seed(12345) - - reps = 10 - errors = np.zeros((4, reps), dtype=float) - - for ii in range(reps): - default_transformer_class = TreeClassificationTransformer - default_transformer_kwargs = {"kwargs": {"max_depth": 30}} - - default_voter_class = TreeClassificationVoter - default_voter_kwargs = {} - - default_decider_class = SimpleArgmaxAverage - default_decider_kwargs = {"classes": np.arange(2)} - progressive_learner = ProgressiveLearner( - default_transformer_class=default_transformer_class, - default_transformer_kwargs=default_transformer_kwargs, - default_voter_class=default_voter_class, - default_voter_kwargs=default_voter_kwargs, - default_decider_class=default_decider_class, - default_decider_kwargs=default_decider_kwargs, - ) - - xor, label_xor = generate_gaussian_parity( - 750, cov_scale=0.1, angle_params=0 - ) - test_xor, test_label_xor = generate_gaussian_parity( - 1000, cov_scale=0.1, angle_params=0 - ) - - nxor, label_nxor = generate_gaussian_parity( - 750, cov_scale=0.1, angle_params=np.pi / 2 - ) - test_nxor, test_label_nxor = generate_gaussian_parity( - 1000, cov_scale=0.1, angle_params=np.pi / 2 - ) - - progressive_learner.add_task(xor, label_xor, num_transformers=10) - progressive_learner.add_task(nxor, label_nxor, num_transformers=10) - - uf_task1 = progressive_learner.predict( - test_xor, transformer_ids=[0], task_id=0 - ) - l2f_task1 = progressive_learner.predict(test_xor, task_id=0) - uf_task2 = progressive_learner.predict( - test_nxor, transformer_ids=[1], task_id=1 - ) - l2f_task2 = progressive_learner.predict(test_nxor, task_id=1) - - errors[0, ii] = 1 - np.mean(uf_task1 == test_label_xor) - errors[1, ii] = 1 - np.mean(l2f_task1 == test_label_xor) - errors[2, ii] = 1 - np.mean(uf_task2 == test_label_nxor) - errors[3, ii] = 1 - np.mean(l2f_task2 == test_label_nxor) - - bte = np.mean(errors[0,]) / np.mean( - errors[ - 1, - ] - ) - fte = np.mean(errors[2,]) / np.mean( - errors[ - 3, - ] - ) - - assert bte > 1 and fte > 1 +import pytest +import numpy as np + +from proglearn.progressive_learner import ProgressiveLearner +from proglearn.deciders import SimpleArgmaxAverage +from proglearn.transformers import ( + TreeClassificationTransformer, + NeuralClassificationTransformer, +) +from proglearn.voters import TreeClassificationVoter, KNNClassificationVoter + + +def generate_2d_rotation(theta=0, acorn=None): + if acorn is not None: + np.random.seed(acorn) + + R = np.array([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) + + return R + + +def generate_gaussian_parity( + n, mean=np.array([-1, -1]), cov_scale=1, angle_params=None, k=1, acorn=None +): + if acorn is not None: + np.random.seed(acorn) + + d = len(mean) + + if mean[0] == -1 and mean[1] == -1: + mean = mean + 1 / 2**k + + mnt = np.random.multinomial(n, 1 / (4**k) * np.ones(4**k)) + cumsum = np.cumsum(mnt) + cumsum = np.concatenate(([0], cumsum)) + + Y = np.zeros(n) + X = np.zeros((n, d)) + + for i in range(2**k): + for j in range(2**k): + temp = np.random.multivariate_normal( + mean, cov_scale * np.eye(d), size=mnt[i * (2**k) + j] + ) + temp[:, 0] += i * (1 / 2 ** (k - 1)) + temp[:, 1] += j * (1 / 2 ** (k - 1)) + + X[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = temp + + if i % 2 == j % 2: + Y[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = 0 + else: + Y[cumsum[i * (2**k) + j] : cumsum[i * (2**k) + j + 1]] = 1 + + if d == 2: + if angle_params is None: + angle_params = np.random.uniform(0, 2 * np.pi) + + R = generate_2d_rotation(angle_params) + X = X @ R + + else: + raise ValueError("d=%i not implemented!" % (d)) + + return X, Y.astype(int) + + +class TestSystem: + def test_nxor(self): + # tests proglearn on xor nxor simulation data + np.random.seed(12345) + + reps = 10 + errors = np.zeros((4, reps), dtype=float) + + for ii in range(reps): + default_transformer_class = TreeClassificationTransformer + default_transformer_kwargs = {"kwargs": {"max_depth": 30}} + + default_voter_class = TreeClassificationVoter + default_voter_kwargs = {} + + default_decider_class = SimpleArgmaxAverage + default_decider_kwargs = {"classes": np.arange(2)} + progressive_learner = ProgressiveLearner( + default_transformer_class=default_transformer_class, + default_transformer_kwargs=default_transformer_kwargs, + default_voter_class=default_voter_class, + default_voter_kwargs=default_voter_kwargs, + default_decider_class=default_decider_class, + default_decider_kwargs=default_decider_kwargs, + ) + + xor, label_xor = generate_gaussian_parity( + 750, cov_scale=0.1, angle_params=0 + ) + test_xor, test_label_xor = generate_gaussian_parity( + 1000, cov_scale=0.1, angle_params=0 + ) + + nxor, label_nxor = generate_gaussian_parity( + 750, cov_scale=0.1, angle_params=np.pi / 2 + ) + test_nxor, test_label_nxor = generate_gaussian_parity( + 1000, cov_scale=0.1, angle_params=np.pi / 2 + ) + + progressive_learner.add_task(xor, label_xor, num_transformers=10) + progressive_learner.add_task(nxor, label_nxor, num_transformers=10) + + uf_task1 = progressive_learner.predict( + test_xor, transformer_ids=[0], task_id=0 + ) + l2f_task1 = progressive_learner.predict(test_xor, task_id=0) + uf_task2 = progressive_learner.predict( + test_nxor, transformer_ids=[1], task_id=1 + ) + l2f_task2 = progressive_learner.predict(test_nxor, task_id=1) + + errors[0, ii] = 1 - np.mean(uf_task1 == test_label_xor) + errors[1, ii] = 1 - np.mean(l2f_task1 == test_label_xor) + errors[2, ii] = 1 - np.mean(uf_task2 == test_label_nxor) + errors[3, ii] = 1 - np.mean(l2f_task2 == test_label_nxor) + + bte = np.mean(errors[0,]) / np.mean( + errors[ + 1, + ] + ) + fte = np.mean(errors[2,]) / np.mean( + errors[ + 3, + ] + ) + + assert bte > 1 and fte > 1