diff --git a/tests/conftest.py b/tests/conftest.py index 357619d..b634189 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,10 +11,11 @@ @pytest.fixture(autouse=True, scope="session") def generated_test_data(): seed = 2024 + max_top_k = 5 x, y = make_multilabel_classification( n_samples=100000, n_features=100, - n_classes=20, + n_classes=50, n_labels=3, length=50, allow_unlabeled=True, @@ -31,12 +32,24 @@ def generated_test_data(): ) clf = MultiOutputClassifier(LogisticRegression()).fit(x_train, y_train) - def clf_predict(clf, x): + def clf_predict(clf, x, sparsfy=False): """ Process the output of a multioutput classifier to get the marginal probabilities of each label (class). """ y_proba = clf.predict_proba(x) - return np.array(y_proba)[:, :, 1].transpose() + + # Convert the output of MultiOutputClassifier that contains the marginal probabilities for both P(y=0) and P(y=1) to just a matrix of P(y=1) + y_proba = np.array(y_proba)[:, :, 1].transpose() + + # Sparify the matrix + if sparsfy: + top_k_thr = -np.partition(-y_proba, max_top_k, axis=1)[:, max_top_k] + top_k_thr = top_k_thr.reshape((-1,)) + top_k_thr[top_k_thr >= 0.1] = 0.1 + y_proba[y_proba < top_k_thr[:, None]] = 0 + assert ((y_proba > 0).sum(axis=1) >= max_top_k).all() + + return y_proba y_proba_train = clf_predict(clf, x_train) y_proba_val = clf_predict(clf, x_val) diff --git a/tests/test_frank_wolfe.py b/tests/test_frank_wolfe.py index b6790df..5de45d4 100644 --- a/tests/test_frank_wolfe.py +++ b/tests/test_frank_wolfe.py @@ -9,16 +9,22 @@ def _run_frank_wolfe(y_val, y_proba_val, y_test, y_proba_test, k, init_a, init_b): + print(f"input dtype={y_proba_val.dtype}") + if isinstance(y_proba_val, csr_matrix): + print( + f" csr_matrix nnz={y_proba_val.nnz}, shape={y_proba_val.shape}, sparsity={y_proba_val.nnz / y_proba_val.shape[0] / y_proba_val.shape[1]}" + ) rnd_clf, meta = find_optimal_randomized_classifier_using_frank_wolfe( y_val, y_proba_val, macro_fmeasure_on_conf_matrix, k, - grad_func="torch", return_meta=True, seed=2024, init_classifier=(init_a, init_b), + verbose=True, ) + print(f" time={meta['time']}s") y_pred = rnd_clf.predict(y_proba_test, seed=2024) assert type(y_pred) == type(y_proba_test) @@ -84,6 +90,9 @@ def test_frank_wolfe(generated_test_data): assert np_C == csr_C == torch_C # Compare with top_k - assert macro_fmeasure_on_conf_matrix( - np_C.tp, np_C.fp, np_C.fn, np_C.tn - ) > macro_fmeasure_on_conf_matrix(top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn) + fw_score = macro_fmeasure_on_conf_matrix(np_C.tp, np_C.fp, np_C.fn, np_C.tn) + top_k_score = macro_fmeasure_on_conf_matrix( + top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn + ) + print(f"fw_score={fw_score}, top_k_score={top_k_score}") + assert fw_score >= top_k_score diff --git a/tests/test_weighted_prediction.py b/tests/test_weighted_prediction.py index 1ef0c25..c483b21 100644 --- a/tests/test_weighted_prediction.py +++ b/tests/test_weighted_prediction.py @@ -6,12 +6,23 @@ from xcolumns.weighted_prediction import predict_weighted_per_instance -def _run_weighted_prediction(y_true, y_proba, k, a, b): - y_pred = predict_weighted_per_instance(y_proba, k, a=a, b=b) +def _run_and_test_weighted_prediction(y_true, y_proba, k, a, b): + print(f"input dtype={y_proba.dtype}") + if isinstance(y_proba, csr_matrix): + print( + f" csr_matrix nnz={y_proba.nnz}, shape={y_proba.shape}, sparsity={y_proba.nnz / y_proba.shape[0] / y_proba.shape[1]}" + ) + y_pred, meta = predict_weighted_per_instance(y_proba, k, a=a, b=b, return_meta=True) + print(f" time={meta['time']}s") + assert type(y_pred) == type(y_proba) + assert y_pred.dtype == y_proba.dtype assert (y_pred.sum(axis=1) == k).all() - return calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False) + return ( + calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False), + y_pred, + ) def test_weighted_prediction(generated_test_data): @@ -30,18 +41,27 @@ def test_weighted_prediction(generated_test_data): # Generate random weights a = np.random.rand(y_proba_train.shape[1]) - b = np.random.rand(y_proba_train.shape[1]) + # b = np.random.rand(y_proba_train.shape[1]) + b = np.zeros(y_proba_train.shape[1]) # Run numpy implementation - np_C = _run_weighted_prediction(y_test, y_proba_test, k, a, b) + y_proba_test = y_proba_test.astype(np.float64) + np_C_64, np_pred_64 = _run_and_test_weighted_prediction( + y_test, y_proba_test, k, a, b + ) + + y_proba_test = y_proba_test.astype(np.float32) + np_C_32, np_pred_32 = _run_and_test_weighted_prediction( + y_test, y_proba_test, k, a, b + ) # Run csr_matrix implementation - csr_C = _run_weighted_prediction( + csr_C, csr_pred = _run_and_test_weighted_prediction( csr_matrix(y_test), csr_matrix(y_proba_test), k, a, b ) # Run torch implementation - torch_C = _run_weighted_prediction( + torch_C, torch_pred = _run_and_test_weighted_prediction( torch.tensor(y_test), torch.tensor(y_proba_test), k, @@ -56,4 +76,4 @@ def test_weighted_prediction(generated_test_data): torch_C.tn = torch_C.tn.numpy() # Compere if all implementations are equal - assert np_C == csr_C == torch_C + assert np_C_64 == np_C_32 == csr_C == torch_C diff --git a/xcolumns/__init__.py b/xcolumns/__init__.py index f102a9c..3b93d0b 100644 --- a/xcolumns/__init__.py +++ b/xcolumns/__init__.py @@ -1 +1 @@ -__version__ = "0.0.1" +__version__ = "0.0.2" diff --git a/xcolumns/block_coordinate.py b/xcolumns/block_coordinate.py index f92b477..f472c9c 100644 --- a/xcolumns/block_coordinate.py +++ b/xcolumns/block_coordinate.py @@ -15,14 +15,14 @@ from .weighted_prediction import predict_top_k -def _get_utility_aggregation_func(utility_aggregation: str): - if utility_aggregation == "mean": +def _get_metric_aggregation_func(metric_aggregation: str): + if metric_aggregation == "mean": return np.mean - elif utility_aggregation == "sum": + elif metric_aggregation == "sum": return np.sum else: raise ValueError( - f"Unsupported utility aggregation function: {utility_aggregation}, must be either 'mean' or 'sum'" + f"Unsupported utility aggregation function: {metric_aggregation}, must be either 'mean' or 'sum'" ) @@ -52,36 +52,36 @@ def _get_initial_y_pred( def _calculate_utility( - bin_utility_func: Union[Callable, list[Callable]], - utility_aggregation_func: Callable, + bin_matric_func: Union[Callable, list[Callable]], + metric_aggregation_func: Callable, Etp: np.ndarray, Efp: np.ndarray, Efn: np.ndarray, Etn: np.ndarray, ) -> np.ndarray: - if callable(bin_utility_func): - bin_utilities = bin_utility_func(Etp, Efp, Efn, Etn) + if callable(bin_matric_func): + bin_utilities = bin_matric_func(Etp, Efp, Efn, Etn) else: bin_utilities = np.array( - [f(Etp[i], Efp[i], Efn[i], Etn[i]) for i, f in enumerate(bin_utility_func)] + [f(Etp[i], Efp[i], Efn[i], Etn[i]) for i, f in enumerate(bin_matric_func)] ) # Validate bin utilities here to omit unnecessary calculations later in _calculate_binary_gains if not isinstance(bin_utilities, np.ndarray): raise ValueError( - f"bin_utility_func must return np.ndarray, but returned {type(bin_utilities)}" + f"bin_matric_func must return np.ndarray, but returned {type(bin_utilities)}" ) if bin_utilities.shape != (Etp.shape[0],): raise ValueError( - f"bin_utility_func must return np.ndarray of shape {Etp.shape[0]}, but returned {bin_utilities.shape}" + f"bin_matric_func must return np.ndarray of shape {Etp.shape[0]}, but returned {bin_utilities.shape}" ) - return utility_aggregation_func(bin_utilities) + return metric_aggregation_func(bin_utilities) def _calculate_binary_gains( - bin_utility_func, + bin_matric_func, pos_Etp: np.ndarray, pos_Efp: np.ndarray, pos_Efn: np.ndarray, @@ -91,20 +91,20 @@ def _calculate_binary_gains( neg_Efn: np.ndarray, neg_Etn: np.ndarray, ) -> np.ndarray: - if callable(bin_utility_func): - pos_utility = bin_utility_func(pos_Etp, pos_Efp, pos_Efn, pos_Etn) - neg_utility = bin_utility_func(neg_Etp, neg_Efp, neg_Efn, neg_Etn) + if callable(bin_matric_func): + pos_utility = bin_matric_func(pos_Etp, pos_Efp, pos_Efn, pos_Etn) + neg_utility = bin_matric_func(neg_Etp, neg_Efp, neg_Efn, neg_Etn) else: pos_utility = np.array( [ f(pos_Etp[i], pos_Efp[i], pos_Efn[i], pos_Etn[i]) - for i, f in enumerate(bin_utility_func) + for i, f in enumerate(bin_matric_func) ] ) neg_utility = np.array( [ f(neg_Etp[i], neg_Efp[i], neg_Efn[i], neg_Etn[i]) - for i, f in enumerate(bin_utility_func) + for i, f in enumerate(bin_matric_func) ] ) @@ -120,7 +120,7 @@ def bc_with_0approx_step_np( Efn: np.ndarray, Etn: np.ndarray, k: int, - bin_utility_func: Union[Callable, list[Callable]], + bin_matric_func: Union[Callable, list[Callable]], greedy: bool = False, maximize: bool = True, only_pred: bool = False, @@ -149,7 +149,7 @@ def bc_with_0approx_step_np( neg_Etn = Etn + (1 - y_proba_i) gains = _calculate_binary_gains( - bin_utility_func, + bin_matric_func, pos_Etp / n, pos_Efp / n, Efn / n, @@ -191,7 +191,7 @@ def bc_with_0approx_step_csr( Efn: np.ndarray, Etn: np.ndarray, k: int, - bin_utility_func: Union[Callable, list[Callable]], + bin_matric_func: Union[Callable, list[Callable]], greedy: bool = False, maximize: bool = True, only_pred: bool = False, @@ -233,7 +233,7 @@ def bc_with_0approx_step_csr( # Calculate gain and selection gains = _calculate_binary_gains( - bin_utility_func, + bin_matric_func, pos_Etpp, pos_Efpp, pos_Efn, @@ -262,20 +262,20 @@ def bc_with_0approx_step_csr( def predict_using_bc_with_0approx( y_proba: Union[np.ndarray, csr_matrix], - bin_utility_func: Union[Callable, list[Callable]], + bin_matric_func: Union[Callable, list[Callable]], k: int, - utility_aggregation: str = "mean", # "mean" or "sum" + metric_aggregation: str = "mean", # "mean" or "sum" + maximize=True, tolerance: float = 1e-6, init_y_pred: Union[ str, np.ndarray, csr_matrix ] = "random", # "random", "topk", "greedy", np.ndarray or csr_matrix max_iter: int = 100, shuffle_order: bool = True, - maximize=True, skip_tn=False, - seed: int = None, - verbose: bool = False, return_meta: bool = False, + seed: Optional[int] = None, + verbose: bool = False, **kwargs, ) -> Union[np.ndarray, csr_matrix]: """ @@ -286,6 +286,11 @@ def predict_using_bc_with_0approx( However both algorithms are equivalent. """ + log_info( + f"Starting optimization of ETU utility metric block coordinate {'ascent' if maximize else 'descent'} algorithm ...", + verbose, + ) + # Initialize the meta data dictionary meta = {"utilities": [], "iters": 0, "time": time()} @@ -299,23 +304,21 @@ def predict_using_bc_with_0approx( else: raise ValueError("y_proba must be either np.ndarray or csr_matrix") - # y_proba is ndarray or csr_matrix n, m = y_proba.shape # Get aggregation function - utility_aggregation_func = _get_utility_aggregation_func(utility_aggregation) + metric_aggregation_func = _get_metric_aggregation_func(metric_aggregation) # Initialize the prediction matrix - print(f" Initializing starting prediction ...") + log_info(f" Initializing initial prediction ...", verbose) greedy = init_y_pred == "greedy" y_pred = _get_initial_y_pred(y_proba, init_y_pred, k, random_at_k_func) - print(y_pred.shape, y_proba.shape) # Initialize the instance order and set seed for shuffling rng = np.random.default_rng(seed) order = np.arange(n) for j in range(1, max_iter + 1): - print(f" Starting iteration {j} ...") + log_info(f" Starting iteration {j}/{max_iter} ...", verbose) if shuffle_order: rng.shuffle(order) @@ -328,14 +331,14 @@ def predict_using_bc_with_0approx( Efn = np.zeros(m, dtype=FLOAT_TYPE) Etn = np.zeros(m, dtype=FLOAT_TYPE) else: - print(" Calculating expected confusion matrix ...") + log_info(" Calculating expected confusion matrix ...", verbose) Etp, Efp, Efn, Etn = calculate_confusion_matrix( y_proba, y_pred, normalize=False, skip_tn=skip_tn ) old_utility = _calculate_utility( - bin_utility_func, - utility_aggregation_func, + bin_matric_func, + metric_aggregation_func, Etp / n, Efp / n, Efn / n, @@ -352,15 +355,15 @@ def predict_using_bc_with_0approx( Efn, Etn, k, - bin_utility_func, + bin_matric_func, greedy=greedy, maximize=maximize, skip_tn=skip_tn, ) new_utility = _calculate_utility( - bin_utility_func, - utility_aggregation_func, + bin_matric_func, + metric_aggregation_func, Etp / n, Efp / n, Efn / n, @@ -371,12 +374,14 @@ def predict_using_bc_with_0approx( meta["iters"] = j meta["utilities"].append(new_utility) - print( - f" Iteration {j} finished, expected utility: {old_utility} -> {new_utility}" + log_info( + f" Iteration {j}/{max_iter} finished, expected metric value: {old_utility} -> {new_utility}", + verbose, ) if abs(new_utility - old_utility) < tolerance: - print( - f" Stopping because improvement of expected utility is smaller than {tolerance}" + log_info( + f" Stopping because improvement of expected metric value is smaller than {tolerance}", + verbose, ) break @@ -500,9 +505,9 @@ def predict_optimizing_coverage_using_bc( ] = "random", # "random", "topk", "random", or csr_matrix max_iter: int = 100, shuffle_order: bool = True, + return_meta: bool = False, seed: int = None, verbose: bool = False, - return_meta: bool = False, **kwargs, ): """ @@ -528,7 +533,7 @@ def predict_optimizing_coverage_using_bc( raise ValueError("y_proba must be either np.ndarray or csr_matrix") # Initialize the prediction matrix - print(f" Initializing starting prediction ...") + log_info(f" Initializing starting prediction ...", verbose) greedy = init_y_pred == "greedy" y_pred = _get_initial_y_pred(y_proba, init_y_pred, k, random_at_k_func) @@ -538,7 +543,7 @@ def predict_optimizing_coverage_using_bc( rng = np.random.default_rng(seed) order = np.arange(n) for j in range(1, max_iter + 1): - print(f" Starting iteration {j} ...") + log_info(f" Starting iteration {j}/{max_iter} ...", verbose) if shuffle_order: rng.shuffle(order) @@ -564,8 +569,15 @@ def predict_optimizing_coverage_using_bc( meta["iters"] = j meta["utilities"].append(new_cov) - print(f" Iteration {j} finished, expected coverage: {old_cov} -> {new_cov}") + log_info( + f" Iteration {j}/{max_iter} finished, expected coverage: {old_cov} -> {new_cov}", + verbose, + ) if new_cov <= old_cov + tolerance: + log_info( + f" Stopping because improvement of expected coverage is smaller than {tolerance}", + verbose, + ) break if return_meta: diff --git a/xcolumns/confusion_matrix.py b/xcolumns/confusion_matrix.py index a5f7bc5..80734dc 100644 --- a/xcolumns/confusion_matrix.py +++ b/xcolumns/confusion_matrix.py @@ -1,4 +1,4 @@ -from typing import Union, Optional +from typing import Optional, Union import numpy as np from scipy.sparse import csr_matrix @@ -12,25 +12,52 @@ # Confusion matrix class ######################################################################################## + class ConfusionMatrix: """ Class representing a confusion matrix. When unpacked returns (tp, fp, fn, tn) (in this order). """ - def __init__(self, tp: Union[Number, DenseMatrix], fp: Union[Number, DenseMatrix], fn: Union[Number, DenseMatrix], tn: Union[Number, DenseMatrix]): + + def __init__( + self, + tp: Union[Number, DenseMatrix], + fp: Union[Number, DenseMatrix], + fn: Union[Number, DenseMatrix], + tn: Union[Number, DenseMatrix], + ): self.tp = tp self.fp = fp self.fn = fn self.tn = tn - # For compatibility with sklearn confusion matrix + # For compatibility with methods that expect a tuple def __iter__(self): yield self.tp yield self.fp yield self.fn yield self.tn + def __eq__(self, other: object) -> bool: + if not isinstance(other, ConfusionMatrix): + return False + + if isinstance(self.tp, Number): + return ( + self.tp == other.tp + and self.fp == other.fp + and self.fn == other.fn + and self.tn == other.tn + ) + elif isinstance(self.tp, DenseMatrix): + return ( + (self.tp == other.tp).all() + and (self.fp == other.fp).all() + and (self.fn == other.fn).all() + and (self.tn == other.tn).all() + ) + def __add__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": return ConfusionMatrix( self.tp + other.tp, @@ -38,14 +65,14 @@ def __add__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": self.fn + other.fn, self.tn + other.tn, ) - + def __iadd__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": self.tp += other.tp self.fp += other.fp self.fn += other.fn self.tn += other.tn return self - + def __sub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": return ConfusionMatrix( self.tp - other.tp, @@ -53,14 +80,14 @@ def __sub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": self.fn - other.fn, self.tn - other.tn, ) - + def __isub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix": self.tp -= other.tp self.fp -= other.fp self.fn -= other.fn self.tn -= other.tn return self - + def __mul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": return ConfusionMatrix( self.tp * other, @@ -68,14 +95,14 @@ def __mul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.fn * other, self.tn * other, ) - + def __imul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.tp *= other self.fp *= other self.fn *= other self.tn *= other return self - + def __truediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": return ConfusionMatrix( self.tp / other, @@ -83,14 +110,14 @@ def __truediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.fn / other, self.tn / other, ) - + def __itruediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.tp /= other self.fp /= other self.fn /= other self.tn /= other return self - + def __floordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": return ConfusionMatrix( self.tp // other, @@ -98,14 +125,13 @@ def __floordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.fn // other, self.tn // other, ) - + def __ifloordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix": self.tp //= other self.fp //= other self.fn //= other self.tn //= other return self - ######################################################################################## @@ -196,7 +222,7 @@ def _calculate_conf_mat_entry( val = func(y_true, y_pred) if normalize: - val /= y_true.shape[0] + val = val / y_true.shape[0] return val @@ -258,7 +284,7 @@ def calculate_confusion_matrix( tn = tp.copy() tn[:] = -1 else: - tn = -tp - fp - fn + (1 if normalize else n) + tn = -tp - fp - fn + (1.0 if normalize else n) return ConfusionMatrix(tp, fp, fn, tn) @@ -296,4 +322,4 @@ def update_unnormalized_confusion_matrix( else: raise ValueError( "y_true and y_pred must be both np.ndarray, both torch.Tensor, or both csr_matrix" - ) \ No newline at end of file + ) diff --git a/xcolumns/frank_wolfe.py b/xcolumns/frank_wolfe.py index 7415179..ad30719 100644 --- a/xcolumns/frank_wolfe.py +++ b/xcolumns/frank_wolfe.py @@ -1,9 +1,9 @@ +import logging from time import time -from typing import Callable, Union +from typing import Callable, Tuple, Union +import autograd import autograd.numpy as np -import torch -from autograd import grad from scipy.sparse import csr_matrix from .confusion_matrix import calculate_confusion_matrix @@ -20,30 +20,84 @@ def _get_grad_as_numpy(t): return np.zeros(t.shape, dtype=FLOAT_TYPE) -def utility_func_with_gradient_torch(utility_func, tp, fp, fn, tn): - if not isinstance(tp, torch.Tensor) or not tp.requires_grad: - tp = torch.tensor(tp, requires_grad=True, dtype=TORCH_FLOAT_TYPE) - fp = torch.tensor(fp, requires_grad=True, dtype=TORCH_FLOAT_TYPE) - fn = torch.tensor(fn, requires_grad=True, dtype=TORCH_FLOAT_TYPE) - tn = torch.tensor(tn, requires_grad=True, dtype=TORCH_FLOAT_TYPE) - utility = utility_func(tp, fp, fn, tn) - utility.backward() - return ( - float(utility), - _get_grad_as_numpy(tp), - _get_grad_as_numpy(fp), - _get_grad_as_numpy(fn), - _get_grad_as_numpy(tn), - ) +_torch_available = False +try: + import torch + + def metric_func_with_gradient_torch_old(metric_func, tp, fp, fn, tn): + if not isinstance(tp, torch.Tensor) or not tp.requires_grad: + tp = torch.tensor(tp, requires_grad=True, dtype=TORCH_FLOAT_TYPE) + fp = torch.tensor(fp, requires_grad=True, dtype=TORCH_FLOAT_TYPE) + fn = torch.tensor(fn, requires_grad=True, dtype=TORCH_FLOAT_TYPE) + tn = torch.tensor(tn, requires_grad=True, dtype=TORCH_FLOAT_TYPE) + utility = metric_func(tp, fp, fn, tn) + utility.backward() + return ( + float(utility), + _get_grad_as_numpy(tp), + _get_grad_as_numpy(fp), + _get_grad_as_numpy(fn), + _get_grad_as_numpy(tn), + ) + + def metric_func_with_gradient_torch(metric_func, tp, fp, fn, tn): + # tp = torch.tensor(tp, requires_grad=True) + # fp = torch.tensor(fp, requires_grad=True) + # fn = torch.tensor(fn, requires_grad=True) + # tn = torch.tensor(tn, requires_grad=True) + tp.requires_grad_(True) + fp.requires_grad_(True) + fn.requires_grad_(True) + tn.requires_grad_(True) + value = metric_func(tp, fp, fn, tn) + tp_grad, fp_grad, fn_grad, tn_grad = torch.autograd.grad( + value, (tp, fp, fn, tn), materialize_grads=True, allow_unused=True + ) + return value, tp_grad, fp_grad, fn_grad, tn_grad + + def _predict_using_randomized_classifier_torch( + y_proba: torch.Tensor, + k: int, + classifiers_a: torch.Tensor, + classifiers_b: torch.Tensor, + classifiers_proba: torch.Tensor, + dtype: Optional[torch.dtype] = None, + seed: Optional[int] = None, + ): + rng = np.random.default_rng(seed) + + n, m = y_proba.shape + c = classifiers_proba.shape[0] + classifiers_range = np.arange(c) + y_pred = torch.zeros( + y_proba.shape, + dtype=y_proba.dtype if dtype is None else dtype, + device=y_proba.device, + ) + for i in range(n): + c_i = rng.choice(classifiers_range, p=classifiers_proba) + gains = y_proba[i] * classifiers_a[c_i] + classifiers_b[c_i] + + if k > 0: + _, top_k = torch.topk(gains, k) + y_pred[i, top_k] = 1 + else: + y_pred[i, gains >= 0] = 1 + + return y_pred + + _torch_available = True +except ImportError: + pass -def utility_func_with_gradient_autograd(utility_func, tp, fp, fn, tn): - grad_func = grad(utility_func, argnum=[0, 1, 2, 3]) - return float(utility_func(tp, fp, fn, tn)), *grad_func(tp, fp, fn, tn) +def metric_func_with_gradient_autograd(metric_func, tp, fp, fn, tn): + grad_func = autograd.grad(metric_func, argnum=[0, 1, 2, 3]) + return float(metric_func(tp, fp, fn, tn)), *grad_func(tp, fp, fn, tn) def _find_best_alpha( - utility_func, + metric_func, tp, fp, fn, @@ -52,20 +106,18 @@ def _find_best_alpha( fp_i, fn_i, tn_i, - search_algo="lin", + search_algo="uniform", eps=0.001, - lin_search_step=0.001, -): - conf_mat_comb = lambda alpha: utility_func( + uniform_search_step=0.001, +) -> Tuple[float, float]: + conf_mat_comb = lambda alpha: metric_func( (1 - alpha) * tp + alpha * tp_i, (1 - alpha) * fp + alpha * fp_i, (1 - alpha) * fn + alpha * fn_i, (1 - alpha) * tn + alpha * tn_i, ) - if search_algo == "lin": - return lin_search(0, 1, lin_search_step, conf_mat_comb) - elif search_algo == "bin": - return bin_search(0, 1, eps, conf_mat_comb) + if search_algo == "uniform": + return uniform_search(0, 1, uniform_search_step, conf_mat_comb) elif search_algo == "ternary": return ternary_search(0, 1, eps, conf_mat_comb) else: @@ -75,68 +127,98 @@ def _find_best_alpha( def find_optimal_randomized_classifier_using_frank_wolfe( y_true: Matrix, y_proba: Matrix, - utility_func: Callable, + metric_func: Callable, k: int, max_iters: int = 100, init_classifier: Union[ str, Tuple[DenseMatrix, DenseMatrix] - ] = "random", # or "random" + ] = "random", # or "default", "topk" + maximize=True, search_for_best_alpha: bool = True, - alpha_search_algo: str = "lin", + alpha_search_algo: str = "uniform", # or "ternary" alpha_eps: float = 0.001, - alpha_lin_search_step: float = 0.001, + alpha_uniform_search_step: float = 0.001, skip_tn=False, seed=None, - verbose: bool = True, + verbose: bool = False, return_meta: bool = False, - grad_func="torch", - **kwargs, ): - log = print - if not verbose: - log = lambda *args, **kwargs: None + log_info( + "Starting searching for optimal randomized classifier using Frank-Wolfe algorithm ...", + verbose, + ) - if type(y_true) != type(y_proba): + # Validate y_true and y_proba + if type(y_true) != type(y_proba) and isinstance(y_true, Matrix): raise ValueError( - f"y_true and y_proba have unsupported combination of types {type(y_true)}, {type(y_proba)}, should be both np.ndarray, both torch.Tensor, or both csr_matrix" + f"y_true and y_proba have unsupported combination of types {type(y_true)} and {type(y_proba)}, should be both np.ndarray, both torch.Tensor, or both csr_matrix" ) - log("Starting Frank-Wolfe algorithm") - n, m = y_proba.shape + if y_true.shape != y_proba.shape: + raise ValueError( + f"y_true and y_proba must have the same shape, got {y_true.shape} and {y_proba.shape}" + ) - if grad_func == "torch": - utility_func_with_gradient = utility_func_with_gradient_torch - elif grad_func == "autograd": - utility_func_with_gradient = utility_func_with_gradient_autograd - else: - raise ValueError(f"Unsupported grad_func: {grad_func}") + n, m = y_proba.shape + log_info(f" Initializing initial {init_classifier} classifier ...", verbose) + # Initialize the classifiers matrix rng = np.random.default_rng(seed) - classifiers_a = np.zeros((max_iters, m), dtype=FLOAT_TYPE) - classifiers_b = np.zeros((max_iters, m), dtype=FLOAT_TYPE) - classifiers_proba = np.ones(max_iters, dtype=FLOAT_TYPE) + classifiers_a = np.zeros((max_iters, m), dtype=DefaultDataDType) + classifiers_b = np.zeros((max_iters, m), dtype=DefaultDataDType) + classifiers_proba = np.ones(max_iters, dtype=DefaultDataDType) - log(f" Initializing initial {init_classifier} classifier ...") if init_classifier in ["default", "topk"]: - classifiers_a[0] = np.ones(m, dtype=FLOAT_TYPE) - classifiers_b[0] = np.full(m, -0.5, dtype=FLOAT_TYPE) + classifiers_a[0] = np.ones(m, dtype=DefaultDataDType) + classifiers_b[0] = np.full(m, -0.5, dtype=DefaultDataDType) elif init_classifier == "random": classifiers_a[0] = rng.random(m) classifiers_b[0] = rng.random(m) - 0.5 + elif ( + isinstance(init_classifier, (tuple, list)) + and len(init_classifier) == 2 + and isinstance(init_classifier[0], DenseMatrix) + and isinstance(init_classifier[1], DenseMatrix) + and init_classifier[0].shape == (m,) + and init_classifier[1].shape == (m,) + ): + if _torch_available and isinstance(init_classifier[0], torch.Tensor): + classifiers_a[0] = init_classifier[0].numpy() + else: + classifiers_a[0] = init_classifier[0] + + if _torch_available and isinstance(init_classifier[1], torch.Tensor): + classifiers_b[0] = init_classifier[1].numpy() + else: + classifiers_b[0] = init_classifier[1] else: - raise ValueError(f"Unsupported type of init_classifier: {init_classifier}") + raise ValueError( + f"Unsupported type of init_classifier, it should be 'default', 'topk', 'random', or a tuple of two np.ndarray or torch.Tensor of shape (y_true.shape[1], )" + ) + + # Adjust types according to the type of y_true and y_proba + if isinstance(y_true, (np.ndarray, csr_matrix)): + metric_func_with_gradient = metric_func_with_gradient_autograd + elif _torch_available and isinstance(y_true, torch.Tensor): + metric_func_with_gradient = metric_func_with_gradient_torch + classifiers_a = torch.tensor( + classifiers_a, dtype=y_proba.dtype, device=y_proba.device + ) + classifiers_b = torch.tensor( + classifiers_b, dtype=y_proba.dtype, device=y_proba.device + ) + classifiers_proba = torch.tensor( + classifiers_proba, dtype=y_proba.dtype, device=y_proba.device + ) + y_pred_i = predict_weighted_per_instance( y_proba, k, th=0.0, a=classifiers_a[0], b=classifiers_b[0] ) - log( - f" y_true: {y_true.shape}, y_pred: {y_pred_i.shape}, y_proba: {y_proba.shape}" - ) - tp, fp, fn, tn = calculate_confusion_matrix( y_true, y_pred_i, normalize=True, skip_tn=skip_tn ) - utility_i = float(utility_func(tp, fp, fn, tn)) + utility_i = metric_func(tp, fp, fn, tn) if return_meta: meta = { @@ -148,25 +230,29 @@ def find_optimal_randomized_classifier_using_frank_wolfe( meta["utilities"].append(utility_i) meta["classifiers_utilities"].append(utility_i) - for i in range(1, max_iters): - log(f" Starting iteration {i} ...") - old_utility, Gtp, Gfp, Gfn, Gtn = utility_func_with_gradient( - utility_func, tp, fp, fn, tn + for i in range(1, max_iters + 1): + log_info(f" Starting iteration {i}/{max_iters} ...", verbose) + old_utility, Gtp, Gfp, Gfn, Gtn = metric_func_with_gradient( + metric_func, tp, fp, fn, tn ) classifiers_a[i] = Gtp - Gfp - Gfn + Gtn classifiers_b[i] = Gfp - Gtn + if not maximize: + classifiers_a[i] *= -1 + classifiers_b[i] *= -1 + y_pred_i = predict_weighted_per_instance( y_proba, k, th=0.0, a=classifiers_a[i], b=classifiers_b[i] ) tp_i, fp_i, fn_i, tn_i = calculate_confusion_matrix( y_true, y_pred_i, normalize=True, skip_tn=skip_tn ) - utility_i = float(utility_func(tp_i, fp_i, fn_i, tn_i)) + utility_i = metric_func(tp_i, fp_i, fn_i, tn_i) if search_for_best_alpha: alpha, _ = _find_best_alpha( - utility_func, + metric_func, tp, fp, fn, @@ -177,7 +263,7 @@ def find_optimal_randomized_classifier_using_frank_wolfe( tn_i, search_algo=alpha_search_algo, eps=alpha_eps, - lin_search_step=alpha_lin_search_step, + uniform_search_step=alpha_uniform_search_step, ) else: alpha = 2 / (i + 1) @@ -189,7 +275,7 @@ def find_optimal_randomized_classifier_using_frank_wolfe( fn = (1 - alpha) * fn + alpha * fn_i tn = (1 - alpha) * tn + alpha * tn_i - new_utility = float(utility_func(tp, fp, fn, tn)) + new_utility = metric_func(tp, fp, fn, tn) if return_meta: meta["alphas"].append(alpha) @@ -197,12 +283,13 @@ def find_optimal_randomized_classifier_using_frank_wolfe( meta["utilities"].append(new_utility) meta["iters"] = i - log( - f" Iteration {i} finished, alpha: {alpha}, utility: {old_utility} -> {new_utility}" + log_info( + f" Iteration {i}/{max_iters} finished, alpha: {alpha}, utility: {old_utility} -> {new_utility}", + verbose, ) if alpha < alpha_eps: - print(f" Stopping because alpha is smaller than {alpha_eps}") + log_info(f" Stopping because alpha is smaller than {alpha_eps}", verbose) # Truncate unused classifiers classifiers_a = classifiers_a[:i] classifiers_b = classifiers_b[:i] @@ -229,26 +316,26 @@ def _predict_using_randomized_classifier_np( classifiers_a: np.ndarray, classifiers_b: np.ndarray, classifiers_proba: np.ndarray, + dtype: Optional[np.dtype] = None, seed: Optional[int] = None, ): - if seed is not None: - np.random.seed(seed) + rng = np.random.default_rng(seed) n, m = y_proba.shape c = classifiers_proba.shape[0] classifiers_range = np.arange(c) - result = np.zeros(y_proba.shape, dtype=FLOAT_TYPE) + y_pred = np.zeros(y_proba.shape, dtype=y_proba.dtype if dtype is None else dtype) for i in range(n): - c_i = np.random.choice(classifiers_range, p=classifiers_proba) + c_i = rng.choice(classifiers_range, p=classifiers_proba) gains = y_proba[i] * classifiers_a[c_i] + classifiers_b[c_i] if k > 0: top_k = np.argpartition(-gains, k)[:k] - result[i, top_k] = 1.0 + y_pred[i, top_k] = 1.0 else: - result[i, gains > 0] = 1.0 + y_pred[i, gains > 0] = 1.0 - return result + return y_pred def _predict_using_randomized_classifier_csr( @@ -257,24 +344,30 @@ def _predict_using_randomized_classifier_csr( classifiers_a: np.ndarray, classifiers_b: np.ndarray, classifiers_proba: np.ndarray, + dtype: Optional[np.dtype] = None, seed: Optional[int] = None, ): - if seed is not None: - np.random.seed(seed) + rng = np.random.default_rng(seed) n, m = y_proba.shape c = classifiers_proba.shape[0] classifiers_range = np.arange(c) initial_row_size = k if k > 0 else 10 - y_pred_data = np.ones(n * initial_row_size, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * initial_row_size, dtype=IND_TYPE) - y_pred_indptr = np.arange(n + 1, dtype=IND_TYPE) * initial_row_size + y_pred_data = np.ones( + n * initial_row_size, dtype=y_proba.data.dtype if dtype is None else dtype + ) + y_pred_indices = np.zeros(n * initial_row_size, dtype=y_proba.indices.dtype) + y_pred_indptr = np.arange(n + 1, dtype=y_proba.indptr.dtype) * initial_row_size # TODO: Can be further optimized in numba for i in range(n): - c_i = np.random.choice(classifiers_range, p=classifiers_proba) - y_pred_data, y_pred_indices, y_pred_indptr = numba_predict_weighted_per_instance_csr_step( + c_i = rng.choice(classifiers_range, p=classifiers_proba) + ( + y_pred_data, + y_pred_indices, + y_pred_indptr, + ) = numba_predict_weighted_per_instance_csr_step( y_pred_data, y_pred_indices, y_pred_indptr, @@ -288,7 +381,7 @@ def _predict_using_randomized_classifier_csr( classifiers_b[c_i], ) - y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE) + # y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE) return csr_matrix((y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m)) @@ -299,8 +392,9 @@ def predict_using_randomized_classifier( classifiers_a: DenseMatrix, classifiers_b: DenseMatrix, classifiers_proba: DenseMatrix, + dtype: Optional[DType] = None, seed: Optional[int] = None, -): +) -> Matrix: # Validate arguments # y_proba @@ -343,13 +437,37 @@ def predict_using_randomized_classifier( ) if isinstance(y_proba, np.ndarray): - return _predict_using_randomized_classifier_np( - y_proba, k, classifiers_a, classifiers_b, classifiers_proba, seed=seed + y_pred = _predict_using_randomized_classifier_np( + y_proba, + k, + classifiers_a, + classifiers_b, + classifiers_proba, + dtype=dtype, + seed=seed, ) elif isinstance(y_proba, csr_matrix): - return _predict_using_randomized_classifier_csr( - y_proba, k, classifiers_a, classifiers_b, classifiers_proba, seed=seed + y_pred = _predict_using_randomized_classifier_csr( + y_proba, + k, + classifiers_a, + classifiers_b, + classifiers_proba, + dtype=dtype, + seed=seed, ) + elif _torch_available and isinstance(y_proba, torch.Tensor): + y_pred = _predict_using_randomized_classifier_torch( + y_proba, + k, + classifiers_a, + classifiers_b, + classifiers_proba, + dtype=dtype, + seed=seed, + ) + + return y_pred class RandomizedWeightedClassifier: diff --git a/xcolumns/numba_csr_functions.py b/xcolumns/numba_csr_functions.py index d78caa3..c386838 100644 --- a/xcolumns/numba_csr_functions.py +++ b/xcolumns/numba_csr_functions.py @@ -9,11 +9,11 @@ @njit(cache=True) -def numba_first_k(n: int, k: int): - y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * k, dtype=IND_TYPE) - y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE) - first_k = np.arange(k, dtype=IND_TYPE) +def numba_first_k(n: int, k: int) -> CSRMatrixAsTuple: + y_pred_data = np.ones(n * k, dtype=DefaultDataDType) + y_pred_indices = np.zeros(n * k, dtype=DefaultIndDType) + y_pred_indptr = np.zeros(n + 1, dtype=DefaultIndDType) + first_k = np.arange(k, dtype=DefaultIndDType) for i in range(n): y_pred_indices[i * k : (i + 1) * k] = first_k y_pred_indptr[i + 1] = y_pred_indptr[i] + k @@ -28,7 +28,7 @@ def numba_random_at_k_from( m: int, k: int, seed: int = None, -): +) -> CSRMatrixAsTuple: """ Selects k random labels for each instance. """ @@ -36,23 +36,18 @@ def numba_random_at_k_from( if seed is not None: np.random.seed(seed) - y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * k, dtype=IND_TYPE) - y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE) - labels_range = np.arange(m, dtype=IND_TYPE) + y_pred_data = np.ones(n * k, dtype=DefaultDataDType) + y_pred_indices = np.zeros(n * k, dtype=y_proba_indices) + y_pred_indptr = np.zeros(n + 1, dtype=y_proba_indptr) + labels_range = np.arange(m, dtype=y_proba_indices) for i in range(n): row_indices = y_proba_indices[y_proba_indptr[i] : y_proba_indptr[i + 1]] if row_indices.size >= k: - # y_pred_indices[i * k : (i + 1) * k] = np.random.choice( - # row_indices, k, replace=False - # ) + # This is faster then above np.random.choice(..., replace=False) y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice( row_indices, k ) else: - # y_pred_indices[i * k : (i + 1) * k] = np.random.choice( - # labels_range, k, replace=False - # ) y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice( labels_range, k ) @@ -62,14 +57,14 @@ def numba_random_at_k_from( @njit(cache=True) -def numba_fast_random_choice(array, k=-1): +def numba_fast_random_choice(array, k=-1) -> np.ndarray: """ Selects k random elements from array. """ n = array.size if k < 0: k = array.size - index = np.arange(n, dtype=IND_TYPE) + index = np.arange(n, dtype=DefaultIndDType) for i in range(k): j = random.randint(i, n - 1) index[i], index[j] = index[j], index[i] @@ -77,7 +72,7 @@ def numba_fast_random_choice(array, k=-1): @njit(cache=True) -def numba_random_at_k(n: int, m: int, k: int, seed: int = None): +def numba_random_at_k(n: int, m: int, k: int, seed: int = None) -> CSRMatrixAsTuple: """ Selects k random labels for each instance. """ @@ -86,14 +81,11 @@ def numba_random_at_k(n: int, m: int, k: int, seed: int = None): # np.random.seed(seed) np.random.choice seems to be quite slow here random.seed(seed) - y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * k, dtype=IND_TYPE) - y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE) - labels_range = np.arange(m, dtype=IND_TYPE) + y_pred_data = np.ones(n * k, dtype=DefaultDataDType) + y_pred_indices = np.zeros(n * k, dtype=DefaultIndDType) + y_pred_indptr = np.zeros(n + 1, dtype=DefaultIndDType) + labels_range = np.arange(m, dtype=DefaultIndDType) for i in range(n): - # y_pred_indices[i * k : (i + 1) * k] = np.random.choice( - # labels_range, k, replace=False - # ) y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice(labels_range, k) y_pred_indptr[i + 1] = y_pred_indptr[i] + k @@ -111,8 +103,8 @@ def numba_csr_vec_mul_vec( """ i = j = k = 0 new_data_size = min(a_data.size, b_data.size) - new_data = np.zeros(new_data_size, dtype=FLOAT_TYPE) - new_indices = np.zeros(new_data_size, dtype=IND_TYPE) + new_data = np.zeros(new_data_size, dtype=a_data.dtype) + new_indices = np.zeros(new_data_size, dtype=a_indices.dtype) while i < a_indices.size and j < b_indices.size: # print(i, j, k, a_indices[i], b_indices[j], a_indices.size, b_indices.size) if a_indices[i] < b_indices[j]: @@ -144,7 +136,7 @@ def numba_calculate_sum_0_csr_mat_mul_mat( Gives the same result as a.multiply(b).sum(axis=0) where a and b are sparse vectors. Requires a and b to have sorted y_proba_indices (in ascending order). """ - result = np.zeros(m, dtype=FLOAT_TYPE) + result = np.zeros(m, dtype=a_data.dtype) for i in range(n): a_start, a_end = a_indptr[i], a_indptr[i + 1] b_start, b_end = b_indptr[i], b_indptr[i + 1] @@ -172,8 +164,8 @@ def numba_csr_vec_mul_ones_minus_vec( """ i = j = k = 0 new_data_size = a_data.size + b_data.size - new_data = np.zeros(new_data_size, dtype=FLOAT_TYPE) - new_indices = np.zeros(new_data_size, dtype=IND_TYPE) + new_data = np.zeros(new_data_size, dtype=a_data.dtype) + new_indices = np.zeros(new_data_size, dtype=a_indices.dtype) while i < a_indices.size: # print(i, j, k, a_indices[i], b_indices[j], a_indices.size, b_indices.size) if j >= b_indices.size or a_indices[i] < b_indices[j]: @@ -209,7 +201,7 @@ def numba_calculate_sum_0_csr_mat_mul_ones_minus_mat( Gives the same result as a.multiply(ones - b).sum(axis=0) where a and b are sparse matrices but makes it more efficient. Requires a and b to have sorted y_proba_indices (in ascending order). """ - result = np.zeros(m, dtype=FLOAT_TYPE) + result = np.zeros(m, dtype=a_data.dtype) for i in range(n): a_start, a_end = a_indptr[i], a_indptr[i + 1] b_start, b_end = b_indptr[i], b_indptr[i + 1] @@ -236,7 +228,7 @@ def numba_calculate_prod_0_csr_mat_mul_ones_minus_mat( Gives the same result as a.multiply(ones - b).prod(axis=0) where a and b are sparse matrices but makes it more efficient. Requires a and b to have sorted y_proba_indices (in ascending order). """ - result = np.ones(m, dtype=FLOAT_TYPE) + result = np.ones(m, dtype=a_data.dtype) for i in range(n): a_start, a_end = a_indptr[i], a_indptr[i + 1] b_start, b_end = b_indptr[i], b_indptr[i + 1] @@ -331,7 +323,7 @@ def numba_argtopk_csr(y_proba_data, y_proba_indices, k) -> np.ndarray: @njit(cache=True) def numba_resize(arr, new_size, fill) -> np.ndarray: - new_arr = np.zeros(new_size, arr.dtype) + new_arr = np.zeros(new_size, dtype=arr.dtype) new_arr[: arr.size] = arr if fill != 0: new_arr[arr.size :] = fill @@ -349,7 +341,7 @@ def numba_set_gains_csr( k, th, is_insert=True, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> CSRMatrixAsTuple: """ Sets top k gains or gains > th for the i-th instance in the y_pred csr matrix. """ @@ -412,7 +404,7 @@ def numba_predict_weighted_per_instance_csr_step( a: Optional[np.ndarray] = None, b: Optional[np.ndarray] = None, is_insert=True, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> CSRMatrixAsTuple: gains_data = y_proba_data[y_proba_indptr[i] : y_proba_indptr[i + 1]] gains_indices = y_proba_indices[y_proba_indptr[i] : y_proba_indptr[i + 1]] @@ -444,12 +436,12 @@ def numba_predict_weighted_per_instance_csr( th: float = 0, a: Optional[np.ndarray] = None, b: Optional[np.ndarray] = None, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> CSRMatrixAsTuple: n = y_proba_indptr.size - 1 initial_row_size = k if k > 0 else 10 - y_pred_data = np.ones(n * initial_row_size, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * initial_row_size, dtype=IND_TYPE) - y_pred_indptr = np.arange(n + 1, dtype=IND_TYPE) * initial_row_size + y_pred_data = np.ones(n * initial_row_size, dtype=y_proba_data.dtype) + y_pred_indices = np.zeros(n * initial_row_size, dtype=y_proba_indices.dtype) + y_pred_indptr = np.arange(n + 1, dtype=y_proba_indptr.dtype) * initial_row_size # This can be done in parallel, but Numba parallelism seems to not work well here for i in range(n): @@ -474,7 +466,6 @@ def numba_predict_weighted_per_instance_csr( y_pred_indices = y_pred_indices[: y_pred_indptr[-1]] y_pred_data = y_pred_data[: y_pred_indptr[-1]] - # y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE) return y_pred_data, y_pred_indices, y_pred_indptr @@ -488,13 +479,13 @@ def numba_predict_macro_balanced_accuracy( m: int, k: int, marginals: np.ndarray, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> CSRMatrixAsTuple: """ Predicts k labels for each instance according to the optimal strategy for macro-balanced accuracy. """ - y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE) - y_pred_indices = np.zeros(n * k, dtype=IND_TYPE) - y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE) + y_pred_data = np.ones(n * k, dtype=y_proba_data.dtype) + y_pred_indices = np.zeros(n * k, dtype=y_proba_indices.dtype) + y_pred_indptr = np.zeros(n + 1, dtype=y_proba_indptr.dtype) for i in range(n): row_data = y_proba_data[y_proba_indptr[i] : y_proba_indptr[i + 1]] diff --git a/xcolumns/types.py b/xcolumns/types.py index 287c8dc..3166543 100644 --- a/xcolumns/types.py +++ b/xcolumns/types.py @@ -1,24 +1,25 @@ +from typing import Tuple, Union + import numpy as np from scipy.sparse import csr_matrix -from typing import Union + DType = np.dtype Number = Union[int, float, np.number] DenseMatrix = np.ndarray Matrix = Union[np.ndarray, csr_matrix] +CSRMatrixAsTuple = Tuple[np.ndarray, np.ndarray, np.ndarray] +DefaultIndDType = np.int32 +DefaultDataDType = np.float32 + # Add torch.Tensor and torch.dtype to matrix types if torch is available try: import torch + DTypes = Union[DType, torch.dtype] DenseMatrix = Union[DenseMatrix, torch.Tensor] Matrix = Union[Matrix, torch.Tensor] TORCH_FLOAT_TYPE = torch.float32 except ImportError: pass - -# TODO: remove these hardcoded types -IND_TYPE = np.int32 # type of indices in CSR matrix -INT_TYPE = np.int32 -FLOAT_TYPE = np.float32 - diff --git a/xcolumns/utils.py b/xcolumns/utils.py index b8871a0..0cc69f9 100644 --- a/xcolumns/utils.py +++ b/xcolumns/utils.py @@ -1,3 +1,4 @@ +import logging from typing import Callable, List, Tuple, Union import numpy as np @@ -7,6 +8,35 @@ from .types import * +######################################################################################## +# Logger +######################################################################################## + + +logger = logging.getLogger("xcolumns") + + +def log(msg: str, verbose: bool, level: int = logging.INFO): + if verbose: + logger.log(level, msg) + + +def log_info(msg: str, verbose: bool): + log(msg, verbose, level=logging.INFO) + + +def log_debug(msg: str, verbose: bool): + log(msg, verbose, level=logging.DEBUG) + + +def log_warning(msg: str, verbose: bool): + log(msg, verbose, level=logging.WARNING) + + +def log_error(msg: str, verbose: bool): + log(msg, verbose, level=logging.ERROR) + + ######################################################################################## # Functions for generating matrices with random prediction at k ######################################################################################## @@ -70,7 +100,7 @@ def construct_csr_matrix( ######################################################################################## -def lin_search( +def uniform_search( low: float, high: float, step: float, func: Callable ) -> Tuple[float, float]: best = low @@ -83,23 +113,6 @@ def lin_search( return best, best_val -def bin_search( - low: float, high: float, eps: float, func: Callable -) -> Tuple[float, float]: - while high - low > eps: - mid = (low + high) / 2 - mid_next = (mid + high) / 2 - - if func(mid) < func(mid_next): - high = mid_next - else: - low = mid - - best = (low + high) / 2 - best_val = func(best) - return best, best_val - - def ternary_search( low: float, high: float, eps: float, func: Callable ) -> Tuple[float, float]: diff --git a/xcolumns/weighted_prediction.py b/xcolumns/weighted_prediction.py index 26c8736..c823808 100644 --- a/xcolumns/weighted_prediction.py +++ b/xcolumns/weighted_prediction.py @@ -1,5 +1,5 @@ from time import time -from typing import Optional, Union +from typing import Optional, Tuple, Union import numpy as np from scipy.sparse import csr_matrix @@ -85,6 +85,11 @@ def _predict_weighted_per_instance_csr( b: Optional[np.ndarray] = None, dtype: Optional[np.dtype] = None, ) -> csr_matrix: + if a is not None and a.dtype != y_proba.dtype: + a = a.astype(y_proba.dtype) + if b is not None and b.dtype != y_proba.dtype: + b = b.astype(y_proba.dtype) + n, m = y_proba.shape ( y_pred_data, @@ -93,7 +98,9 @@ def _predict_weighted_per_instance_csr( ) = numba_predict_weighted_per_instance_csr( *unpack_csr_matrix(y_proba), k, th, a, b ) - return csr_matrix((y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m)) + return csr_matrix( + (y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m), dtype=dtype + ) def predict_weighted_per_instance( @@ -104,7 +111,7 @@ def predict_weighted_per_instance( b: Optional[DenseMatrix] = None, dtype: Optional[DType] = None, return_meta: bool = False, -) -> Matrix: +) -> Union[Matrix, Tuple[Matrix, dict]]: """ Predict ... TODO: Add description """ @@ -176,7 +183,7 @@ def predict_top_k( y_proba: Matrix, k: int, return_meta: bool = False, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: return predict_weighted_per_instance(y_proba, k=k, return_meta=return_meta) @@ -187,8 +194,7 @@ def predict_for_optimal_macro_recall( # (for population) priors: DenseMatrix, epsilon: float = 1e-6, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if priors.shape[0] != y_proba.shape[1]: raise ValueError("priors must be of shape (y_proba[1],)") @@ -203,8 +209,7 @@ def predict_inv_propensity_weighted_instance( k: int, inv_ps: DenseMatrix, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if inv_ps.shape[0] != y_proba.shape[1]: raise ValueError("inv_ps must be of shape (y_proba[1],)") @@ -219,8 +224,7 @@ def predict_log_weighted_per_instance( priors: DenseMatrix, epsilon: float = 1e-6, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if priors.shape[0] != y_proba.shape[1]: raise ValueError("priors must be of shape (y_proba[1],)") @@ -236,8 +240,7 @@ def predict_sqrt_weighted_instance( priors: DenseMatrix, epsilon: float = 1e-6, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if priors.shape[0] != y_proba.shape[1]: raise ValueError("priors must be of shape (y_proba[1],)") @@ -254,8 +257,7 @@ def predict_power_law_weighted_instance( beta: float, epsilon: float = 1e-6, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if priors.shape[0] != y_proba.shape[1]: raise ValueError("priors must be of shape (y_proba[1],)") @@ -269,8 +271,7 @@ def predict_for_optimal_instance_precision( y_proba: Matrix, k: int, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: return predict_top_k(y_proba, k=k, return_meta=return_meta) @@ -279,7 +280,7 @@ def _predict_for_optimal_macro_balanced_accuracy_np( k: int, priors: DenseMatrix, epsilon: float = 1e-6, -): +) -> Matrix: n, m = y_proba.shape assert priors.shape == (m,) priors = priors + epsilon @@ -299,7 +300,7 @@ def _predict_for_optimal_macro_balanced_accuracy_csr( k: int, priors: DenseMatrix, epsilon: float = 1e-6, -): +) -> Matrix: n, m = y_proba.shape assert priors.shape == (m,) priors = priors + epsilon @@ -320,8 +321,7 @@ def predict_for_optimal_macro_balanced_accuracy( # (for population) priors: DenseMatrix, epsilon: float = 1e-6, return_meta: bool = False, - **kwargs, -): +) -> Union[Matrix, Tuple[Matrix, dict]]: if priors.shape[0] != y_proba.shape[1]: raise ValueError("priors must be of shape (y_proba[1],)")