diff --git a/tests/conftest.py b/tests/conftest.py
index 357619d..b634189 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,10 +11,11 @@
 @pytest.fixture(autouse=True, scope="session")
 def generated_test_data():
     seed = 2024
+    max_top_k = 5
     x, y = make_multilabel_classification(
         n_samples=100000,
         n_features=100,
-        n_classes=20,
+        n_classes=50,
         n_labels=3,
         length=50,
         allow_unlabeled=True,
@@ -31,12 +32,24 @@ def generated_test_data():
     )
     clf = MultiOutputClassifier(LogisticRegression()).fit(x_train, y_train)
 
-    def clf_predict(clf, x):
+    def clf_predict(clf, x, sparsfy=False):
         """
         Process the output of a multioutput classifier to get the marginal probabilities of each label (class).
         """
         y_proba = clf.predict_proba(x)
-        return np.array(y_proba)[:, :, 1].transpose()
+
+        # Convert the output of MultiOutputClassifier that contains the marginal probabilities for both P(y=0) and P(y=1) to just a matrix of P(y=1)
+        y_proba = np.array(y_proba)[:, :, 1].transpose()
+
+        # Sparify the matrix
+        if sparsfy:
+            top_k_thr = -np.partition(-y_proba, max_top_k, axis=1)[:, max_top_k]
+            top_k_thr = top_k_thr.reshape((-1,))
+            top_k_thr[top_k_thr >= 0.1] = 0.1
+            y_proba[y_proba < top_k_thr[:, None]] = 0
+            assert ((y_proba > 0).sum(axis=1) >= max_top_k).all()
+
+        return y_proba
 
     y_proba_train = clf_predict(clf, x_train)
     y_proba_val = clf_predict(clf, x_val)
diff --git a/tests/test_frank_wolfe.py b/tests/test_frank_wolfe.py
index b6790df..5de45d4 100644
--- a/tests/test_frank_wolfe.py
+++ b/tests/test_frank_wolfe.py
@@ -9,16 +9,22 @@
 
 
 def _run_frank_wolfe(y_val, y_proba_val, y_test, y_proba_test, k, init_a, init_b):
+    print(f"input dtype={y_proba_val.dtype}")
+    if isinstance(y_proba_val, csr_matrix):
+        print(
+            f"  csr_matrix nnz={y_proba_val.nnz}, shape={y_proba_val.shape}, sparsity={y_proba_val.nnz / y_proba_val.shape[0] / y_proba_val.shape[1]}"
+        )
     rnd_clf, meta = find_optimal_randomized_classifier_using_frank_wolfe(
         y_val,
         y_proba_val,
         macro_fmeasure_on_conf_matrix,
         k,
-        grad_func="torch",
         return_meta=True,
         seed=2024,
         init_classifier=(init_a, init_b),
+        verbose=True,
     )
+    print(f"  time={meta['time']}s")
 
     y_pred = rnd_clf.predict(y_proba_test, seed=2024)
     assert type(y_pred) == type(y_proba_test)
@@ -84,6 +90,9 @@ def test_frank_wolfe(generated_test_data):
     assert np_C == csr_C == torch_C
 
     # Compare with top_k
-    assert macro_fmeasure_on_conf_matrix(
-        np_C.tp, np_C.fp, np_C.fn, np_C.tn
-    ) > macro_fmeasure_on_conf_matrix(top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn)
+    fw_score = macro_fmeasure_on_conf_matrix(np_C.tp, np_C.fp, np_C.fn, np_C.tn)
+    top_k_score = macro_fmeasure_on_conf_matrix(
+        top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn
+    )
+    print(f"fw_score={fw_score}, top_k_score={top_k_score}")
+    assert fw_score >= top_k_score
diff --git a/tests/test_weighted_prediction.py b/tests/test_weighted_prediction.py
index 1ef0c25..c483b21 100644
--- a/tests/test_weighted_prediction.py
+++ b/tests/test_weighted_prediction.py
@@ -6,12 +6,23 @@
 from xcolumns.weighted_prediction import predict_weighted_per_instance
 
 
-def _run_weighted_prediction(y_true, y_proba, k, a, b):
-    y_pred = predict_weighted_per_instance(y_proba, k, a=a, b=b)
+def _run_and_test_weighted_prediction(y_true, y_proba, k, a, b):
+    print(f"input dtype={y_proba.dtype}")
+    if isinstance(y_proba, csr_matrix):
+        print(
+            f"  csr_matrix nnz={y_proba.nnz}, shape={y_proba.shape}, sparsity={y_proba.nnz / y_proba.shape[0] / y_proba.shape[1]}"
+        )
+    y_pred, meta = predict_weighted_per_instance(y_proba, k, a=a, b=b, return_meta=True)
+    print(f"  time={meta['time']}s")
+
     assert type(y_pred) == type(y_proba)
+    assert y_pred.dtype == y_proba.dtype
     assert (y_pred.sum(axis=1) == k).all()
 
-    return calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False)
+    return (
+        calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False),
+        y_pred,
+    )
 
 
 def test_weighted_prediction(generated_test_data):
@@ -30,18 +41,27 @@ def test_weighted_prediction(generated_test_data):
 
     # Generate random weights
     a = np.random.rand(y_proba_train.shape[1])
-    b = np.random.rand(y_proba_train.shape[1])
+    # b = np.random.rand(y_proba_train.shape[1])
+    b = np.zeros(y_proba_train.shape[1])
 
     # Run numpy implementation
-    np_C = _run_weighted_prediction(y_test, y_proba_test, k, a, b)
+    y_proba_test = y_proba_test.astype(np.float64)
+    np_C_64, np_pred_64 = _run_and_test_weighted_prediction(
+        y_test, y_proba_test, k, a, b
+    )
+
+    y_proba_test = y_proba_test.astype(np.float32)
+    np_C_32, np_pred_32 = _run_and_test_weighted_prediction(
+        y_test, y_proba_test, k, a, b
+    )
 
     # Run csr_matrix implementation
-    csr_C = _run_weighted_prediction(
+    csr_C, csr_pred = _run_and_test_weighted_prediction(
         csr_matrix(y_test), csr_matrix(y_proba_test), k, a, b
     )
 
     # Run torch implementation
-    torch_C = _run_weighted_prediction(
+    torch_C, torch_pred = _run_and_test_weighted_prediction(
         torch.tensor(y_test),
         torch.tensor(y_proba_test),
         k,
@@ -56,4 +76,4 @@ def test_weighted_prediction(generated_test_data):
     torch_C.tn = torch_C.tn.numpy()
 
     # Compere if all implementations are equal
-    assert np_C == csr_C == torch_C
+    assert np_C_64 == np_C_32 == csr_C == torch_C
diff --git a/xcolumns/__init__.py b/xcolumns/__init__.py
index f102a9c..3b93d0b 100644
--- a/xcolumns/__init__.py
+++ b/xcolumns/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.1"
+__version__ = "0.0.2"
diff --git a/xcolumns/block_coordinate.py b/xcolumns/block_coordinate.py
index f92b477..f472c9c 100644
--- a/xcolumns/block_coordinate.py
+++ b/xcolumns/block_coordinate.py
@@ -15,14 +15,14 @@
 from .weighted_prediction import predict_top_k
 
 
-def _get_utility_aggregation_func(utility_aggregation: str):
-    if utility_aggregation == "mean":
+def _get_metric_aggregation_func(metric_aggregation: str):
+    if metric_aggregation == "mean":
         return np.mean
-    elif utility_aggregation == "sum":
+    elif metric_aggregation == "sum":
         return np.sum
     else:
         raise ValueError(
-            f"Unsupported utility aggregation function: {utility_aggregation}, must be either 'mean' or 'sum'"
+            f"Unsupported utility aggregation function: {metric_aggregation}, must be either 'mean' or 'sum'"
         )
 
 
@@ -52,36 +52,36 @@ def _get_initial_y_pred(
 
 
 def _calculate_utility(
-    bin_utility_func: Union[Callable, list[Callable]],
-    utility_aggregation_func: Callable,
+    bin_matric_func: Union[Callable, list[Callable]],
+    metric_aggregation_func: Callable,
     Etp: np.ndarray,
     Efp: np.ndarray,
     Efn: np.ndarray,
     Etn: np.ndarray,
 ) -> np.ndarray:
-    if callable(bin_utility_func):
-        bin_utilities = bin_utility_func(Etp, Efp, Efn, Etn)
+    if callable(bin_matric_func):
+        bin_utilities = bin_matric_func(Etp, Efp, Efn, Etn)
     else:
         bin_utilities = np.array(
-            [f(Etp[i], Efp[i], Efn[i], Etn[i]) for i, f in enumerate(bin_utility_func)]
+            [f(Etp[i], Efp[i], Efn[i], Etn[i]) for i, f in enumerate(bin_matric_func)]
         )
 
     # Validate bin utilities here to omit unnecessary calculations later in _calculate_binary_gains
     if not isinstance(bin_utilities, np.ndarray):
         raise ValueError(
-            f"bin_utility_func must return np.ndarray, but returned {type(bin_utilities)}"
+            f"bin_matric_func must return np.ndarray, but returned {type(bin_utilities)}"
         )
 
     if bin_utilities.shape != (Etp.shape[0],):
         raise ValueError(
-            f"bin_utility_func must return np.ndarray of shape {Etp.shape[0]}, but returned {bin_utilities.shape}"
+            f"bin_matric_func must return np.ndarray of shape {Etp.shape[0]}, but returned {bin_utilities.shape}"
         )
 
-    return utility_aggregation_func(bin_utilities)
+    return metric_aggregation_func(bin_utilities)
 
 
 def _calculate_binary_gains(
-    bin_utility_func,
+    bin_matric_func,
     pos_Etp: np.ndarray,
     pos_Efp: np.ndarray,
     pos_Efn: np.ndarray,
@@ -91,20 +91,20 @@ def _calculate_binary_gains(
     neg_Efn: np.ndarray,
     neg_Etn: np.ndarray,
 ) -> np.ndarray:
-    if callable(bin_utility_func):
-        pos_utility = bin_utility_func(pos_Etp, pos_Efp, pos_Efn, pos_Etn)
-        neg_utility = bin_utility_func(neg_Etp, neg_Efp, neg_Efn, neg_Etn)
+    if callable(bin_matric_func):
+        pos_utility = bin_matric_func(pos_Etp, pos_Efp, pos_Efn, pos_Etn)
+        neg_utility = bin_matric_func(neg_Etp, neg_Efp, neg_Efn, neg_Etn)
     else:
         pos_utility = np.array(
             [
                 f(pos_Etp[i], pos_Efp[i], pos_Efn[i], pos_Etn[i])
-                for i, f in enumerate(bin_utility_func)
+                for i, f in enumerate(bin_matric_func)
             ]
         )
         neg_utility = np.array(
             [
                 f(neg_Etp[i], neg_Efp[i], neg_Efn[i], neg_Etn[i])
-                for i, f in enumerate(bin_utility_func)
+                for i, f in enumerate(bin_matric_func)
             ]
         )
 
@@ -120,7 +120,7 @@ def bc_with_0approx_step_np(
     Efn: np.ndarray,
     Etn: np.ndarray,
     k: int,
-    bin_utility_func: Union[Callable, list[Callable]],
+    bin_matric_func: Union[Callable, list[Callable]],
     greedy: bool = False,
     maximize: bool = True,
     only_pred: bool = False,
@@ -149,7 +149,7 @@ def bc_with_0approx_step_np(
         neg_Etn = Etn + (1 - y_proba_i)
 
     gains = _calculate_binary_gains(
-        bin_utility_func,
+        bin_matric_func,
         pos_Etp / n,
         pos_Efp / n,
         Efn / n,
@@ -191,7 +191,7 @@ def bc_with_0approx_step_csr(
     Efn: np.ndarray,
     Etn: np.ndarray,
     k: int,
-    bin_utility_func: Union[Callable, list[Callable]],
+    bin_matric_func: Union[Callable, list[Callable]],
     greedy: bool = False,
     maximize: bool = True,
     only_pred: bool = False,
@@ -233,7 +233,7 @@ def bc_with_0approx_step_csr(
 
     # Calculate gain and selection
     gains = _calculate_binary_gains(
-        bin_utility_func,
+        bin_matric_func,
         pos_Etpp,
         pos_Efpp,
         pos_Efn,
@@ -262,20 +262,20 @@ def bc_with_0approx_step_csr(
 
 def predict_using_bc_with_0approx(
     y_proba: Union[np.ndarray, csr_matrix],
-    bin_utility_func: Union[Callable, list[Callable]],
+    bin_matric_func: Union[Callable, list[Callable]],
     k: int,
-    utility_aggregation: str = "mean",  # "mean" or "sum"
+    metric_aggregation: str = "mean",  # "mean" or "sum"
+    maximize=True,
     tolerance: float = 1e-6,
     init_y_pred: Union[
         str, np.ndarray, csr_matrix
     ] = "random",  # "random", "topk", "greedy", np.ndarray or csr_matrix
     max_iter: int = 100,
     shuffle_order: bool = True,
-    maximize=True,
     skip_tn=False,
-    seed: int = None,
-    verbose: bool = False,
     return_meta: bool = False,
+    seed: Optional[int] = None,
+    verbose: bool = False,
     **kwargs,
 ) -> Union[np.ndarray, csr_matrix]:
     """
@@ -286,6 +286,11 @@ def predict_using_bc_with_0approx(
     However both algorithms are equivalent.
     """
 
+    log_info(
+        f"Starting optimization of ETU utility metric block coordinate {'ascent' if maximize else 'descent'} algorithm ...",
+        verbose,
+    )
+
     # Initialize the meta data dictionary
     meta = {"utilities": [], "iters": 0, "time": time()}
 
@@ -299,23 +304,21 @@ def predict_using_bc_with_0approx(
     else:
         raise ValueError("y_proba must be either np.ndarray or csr_matrix")
 
-    # y_proba is ndarray or csr_matrix
     n, m = y_proba.shape
 
     # Get aggregation function
-    utility_aggregation_func = _get_utility_aggregation_func(utility_aggregation)
+    metric_aggregation_func = _get_metric_aggregation_func(metric_aggregation)
 
     # Initialize the prediction matrix
-    print(f"  Initializing starting prediction ...")
+    log_info(f"  Initializing initial prediction ...", verbose)
     greedy = init_y_pred == "greedy"
     y_pred = _get_initial_y_pred(y_proba, init_y_pred, k, random_at_k_func)
-    print(y_pred.shape, y_proba.shape)
 
     # Initialize the instance order and set seed for shuffling
     rng = np.random.default_rng(seed)
     order = np.arange(n)
     for j in range(1, max_iter + 1):
-        print(f"  Starting iteration {j} ...")
+        log_info(f"  Starting iteration {j}/{max_iter} ...", verbose)
 
         if shuffle_order:
             rng.shuffle(order)
@@ -328,14 +331,14 @@ def predict_using_bc_with_0approx(
             Efn = np.zeros(m, dtype=FLOAT_TYPE)
             Etn = np.zeros(m, dtype=FLOAT_TYPE)
         else:
-            print("  Calculating expected confusion matrix ...")
+            log_info("    Calculating expected confusion matrix ...", verbose)
             Etp, Efp, Efn, Etn = calculate_confusion_matrix(
                 y_proba, y_pred, normalize=False, skip_tn=skip_tn
             )
 
         old_utility = _calculate_utility(
-            bin_utility_func,
-            utility_aggregation_func,
+            bin_matric_func,
+            metric_aggregation_func,
             Etp / n,
             Efp / n,
             Efn / n,
@@ -352,15 +355,15 @@ def predict_using_bc_with_0approx(
                 Efn,
                 Etn,
                 k,
-                bin_utility_func,
+                bin_matric_func,
                 greedy=greedy,
                 maximize=maximize,
                 skip_tn=skip_tn,
             )
 
         new_utility = _calculate_utility(
-            bin_utility_func,
-            utility_aggregation_func,
+            bin_matric_func,
+            metric_aggregation_func,
             Etp / n,
             Efp / n,
             Efn / n,
@@ -371,12 +374,14 @@ def predict_using_bc_with_0approx(
         meta["iters"] = j
         meta["utilities"].append(new_utility)
 
-        print(
-            f"  Iteration {j} finished, expected utility: {old_utility} -> {new_utility}"
+        log_info(
+            f"    Iteration {j}/{max_iter} finished, expected metric value: {old_utility} -> {new_utility}",
+            verbose,
         )
         if abs(new_utility - old_utility) < tolerance:
-            print(
-                f"  Stopping because improvement of expected utility is smaller than {tolerance}"
+            log_info(
+                f"  Stopping because improvement of expected metric value is smaller than {tolerance}",
+                verbose,
             )
             break
 
@@ -500,9 +505,9 @@ def predict_optimizing_coverage_using_bc(
     ] = "random",  # "random", "topk", "random", or csr_matrix
     max_iter: int = 100,
     shuffle_order: bool = True,
+    return_meta: bool = False,
     seed: int = None,
     verbose: bool = False,
-    return_meta: bool = False,
     **kwargs,
 ):
     """
@@ -528,7 +533,7 @@ def predict_optimizing_coverage_using_bc(
         raise ValueError("y_proba must be either np.ndarray or csr_matrix")
 
     # Initialize the prediction matrix
-    print(f"  Initializing starting prediction ...")
+    log_info(f"  Initializing starting prediction ...", verbose)
     greedy = init_y_pred == "greedy"
     y_pred = _get_initial_y_pred(y_proba, init_y_pred, k, random_at_k_func)
 
@@ -538,7 +543,7 @@ def predict_optimizing_coverage_using_bc(
     rng = np.random.default_rng(seed)
     order = np.arange(n)
     for j in range(1, max_iter + 1):
-        print(f"  Starting iteration {j} ...")
+        log_info(f"  Starting iteration {j}/{max_iter} ...", verbose)
 
         if shuffle_order:
             rng.shuffle(order)
@@ -564,8 +569,15 @@ def predict_optimizing_coverage_using_bc(
         meta["iters"] = j
         meta["utilities"].append(new_cov)
 
-        print(f"  Iteration {j} finished, expected coverage: {old_cov} -> {new_cov}")
+        log_info(
+            f"    Iteration {j}/{max_iter} finished, expected coverage: {old_cov} -> {new_cov}",
+            verbose,
+        )
         if new_cov <= old_cov + tolerance:
+            log_info(
+                f"  Stopping because improvement of expected coverage is smaller than {tolerance}",
+                verbose,
+            )
             break
 
     if return_meta:
diff --git a/xcolumns/confusion_matrix.py b/xcolumns/confusion_matrix.py
index a5f7bc5..80734dc 100644
--- a/xcolumns/confusion_matrix.py
+++ b/xcolumns/confusion_matrix.py
@@ -1,4 +1,4 @@
-from typing import Union, Optional
+from typing import Optional, Union
 
 import numpy as np
 from scipy.sparse import csr_matrix
@@ -12,25 +12,52 @@
 # Confusion matrix class
 ########################################################################################
 
+
 class ConfusionMatrix:
     """
     Class representing a confusion matrix.
 
     When unpacked returns (tp, fp, fn, tn) (in this order).
     """
-    def __init__(self, tp: Union[Number, DenseMatrix], fp: Union[Number, DenseMatrix], fn: Union[Number, DenseMatrix], tn: Union[Number, DenseMatrix]):
+
+    def __init__(
+        self,
+        tp: Union[Number, DenseMatrix],
+        fp: Union[Number, DenseMatrix],
+        fn: Union[Number, DenseMatrix],
+        tn: Union[Number, DenseMatrix],
+    ):
         self.tp = tp
         self.fp = fp
         self.fn = fn
         self.tn = tn
 
-    # For compatibility with sklearn confusion matrix
+    # For compatibility with methods that expect a tuple
     def __iter__(self):
         yield self.tp
         yield self.fp
         yield self.fn
         yield self.tn
 
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, ConfusionMatrix):
+            return False
+
+        if isinstance(self.tp, Number):
+            return (
+                self.tp == other.tp
+                and self.fp == other.fp
+                and self.fn == other.fn
+                and self.tn == other.tn
+            )
+        elif isinstance(self.tp, DenseMatrix):
+            return (
+                (self.tp == other.tp).all()
+                and (self.fp == other.fp).all()
+                and (self.fn == other.fn).all()
+                and (self.tn == other.tn).all()
+            )
+
     def __add__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
         return ConfusionMatrix(
             self.tp + other.tp,
@@ -38,14 +65,14 @@ def __add__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
             self.fn + other.fn,
             self.tn + other.tn,
         )
-    
+
     def __iadd__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
         self.tp += other.tp
         self.fp += other.fp
         self.fn += other.fn
         self.tn += other.tn
         return self
-    
+
     def __sub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
         return ConfusionMatrix(
             self.tp - other.tp,
@@ -53,14 +80,14 @@ def __sub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
             self.fn - other.fn,
             self.tn - other.tn,
         )
-    
+
     def __isub__(self, other: "ConfusionMatrix") -> "ConfusionMatrix":
         self.tp -= other.tp
         self.fp -= other.fp
         self.fn -= other.fn
         self.tn -= other.tn
         return self
-    
+
     def __mul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         return ConfusionMatrix(
             self.tp * other,
@@ -68,14 +95,14 @@ def __mul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
             self.fn * other,
             self.tn * other,
         )
-    
+
     def __imul__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         self.tp *= other
         self.fp *= other
         self.fn *= other
         self.tn *= other
         return self
-    
+
     def __truediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         return ConfusionMatrix(
             self.tp / other,
@@ -83,14 +110,14 @@ def __truediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
             self.fn / other,
             self.tn / other,
         )
-    
+
     def __itruediv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         self.tp /= other
         self.fp /= other
         self.fn /= other
         self.tn /= other
         return self
-    
+
     def __floordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         return ConfusionMatrix(
             self.tp // other,
@@ -98,14 +125,13 @@ def __floordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
             self.fn // other,
             self.tn // other,
         )
-    
+
     def __ifloordiv__(self, other: Union[Number, DenseMatrix]) -> "ConfusionMatrix":
         self.tp //= other
         self.fp //= other
         self.fn //= other
         self.tn //= other
         return self
-    
 
 
 ########################################################################################
@@ -196,7 +222,7 @@ def _calculate_conf_mat_entry(
     val = func(y_true, y_pred)
 
     if normalize:
-        val /= y_true.shape[0]
+        val = val / y_true.shape[0]
 
     return val
 
@@ -258,7 +284,7 @@ def calculate_confusion_matrix(
         tn = tp.copy()
         tn[:] = -1
     else:
-        tn = -tp - fp - fn + (1 if normalize else n)
+        tn = -tp - fp - fn + (1.0 if normalize else n)
 
     return ConfusionMatrix(tp, fp, fn, tn)
 
@@ -296,4 +322,4 @@ def update_unnormalized_confusion_matrix(
     else:
         raise ValueError(
             "y_true and y_pred must be both np.ndarray, both torch.Tensor, or both csr_matrix"
-        )
\ No newline at end of file
+        )
diff --git a/xcolumns/frank_wolfe.py b/xcolumns/frank_wolfe.py
index 7415179..ad30719 100644
--- a/xcolumns/frank_wolfe.py
+++ b/xcolumns/frank_wolfe.py
@@ -1,9 +1,9 @@
+import logging
 from time import time
-from typing import Callable, Union
+from typing import Callable, Tuple, Union
 
+import autograd
 import autograd.numpy as np
-import torch
-from autograd import grad
 from scipy.sparse import csr_matrix
 
 from .confusion_matrix import calculate_confusion_matrix
@@ -20,30 +20,84 @@ def _get_grad_as_numpy(t):
         return np.zeros(t.shape, dtype=FLOAT_TYPE)
 
 
-def utility_func_with_gradient_torch(utility_func, tp, fp, fn, tn):
-    if not isinstance(tp, torch.Tensor) or not tp.requires_grad:
-        tp = torch.tensor(tp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
-        fp = torch.tensor(fp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
-        fn = torch.tensor(fn, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
-        tn = torch.tensor(tn, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
-    utility = utility_func(tp, fp, fn, tn)
-    utility.backward()
-    return (
-        float(utility),
-        _get_grad_as_numpy(tp),
-        _get_grad_as_numpy(fp),
-        _get_grad_as_numpy(fn),
-        _get_grad_as_numpy(tn),
-    )
+_torch_available = False
+try:
+    import torch
+
+    def metric_func_with_gradient_torch_old(metric_func, tp, fp, fn, tn):
+        if not isinstance(tp, torch.Tensor) or not tp.requires_grad:
+            tp = torch.tensor(tp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
+            fp = torch.tensor(fp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
+            fn = torch.tensor(fn, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
+            tn = torch.tensor(tn, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
+        utility = metric_func(tp, fp, fn, tn)
+        utility.backward()
+        return (
+            float(utility),
+            _get_grad_as_numpy(tp),
+            _get_grad_as_numpy(fp),
+            _get_grad_as_numpy(fn),
+            _get_grad_as_numpy(tn),
+        )
+
+    def metric_func_with_gradient_torch(metric_func, tp, fp, fn, tn):
+        # tp = torch.tensor(tp, requires_grad=True)
+        # fp = torch.tensor(fp, requires_grad=True)
+        # fn = torch.tensor(fn, requires_grad=True)
+        # tn = torch.tensor(tn, requires_grad=True)
+        tp.requires_grad_(True)
+        fp.requires_grad_(True)
+        fn.requires_grad_(True)
+        tn.requires_grad_(True)
+        value = metric_func(tp, fp, fn, tn)
+        tp_grad, fp_grad, fn_grad, tn_grad = torch.autograd.grad(
+            value, (tp, fp, fn, tn), materialize_grads=True, allow_unused=True
+        )
+        return value, tp_grad, fp_grad, fn_grad, tn_grad
+
+    def _predict_using_randomized_classifier_torch(
+        y_proba: torch.Tensor,
+        k: int,
+        classifiers_a: torch.Tensor,
+        classifiers_b: torch.Tensor,
+        classifiers_proba: torch.Tensor,
+        dtype: Optional[torch.dtype] = None,
+        seed: Optional[int] = None,
+    ):
+        rng = np.random.default_rng(seed)
+
+        n, m = y_proba.shape
+        c = classifiers_proba.shape[0]
+        classifiers_range = np.arange(c)
+        y_pred = torch.zeros(
+            y_proba.shape,
+            dtype=y_proba.dtype if dtype is None else dtype,
+            device=y_proba.device,
+        )
+        for i in range(n):
+            c_i = rng.choice(classifiers_range, p=classifiers_proba)
+            gains = y_proba[i] * classifiers_a[c_i] + classifiers_b[c_i]
+
+            if k > 0:
+                _, top_k = torch.topk(gains, k)
+                y_pred[i, top_k] = 1
+            else:
+                y_pred[i, gains >= 0] = 1
+
+        return y_pred
+
+    _torch_available = True
+except ImportError:
+    pass
 
 
-def utility_func_with_gradient_autograd(utility_func, tp, fp, fn, tn):
-    grad_func = grad(utility_func, argnum=[0, 1, 2, 3])
-    return float(utility_func(tp, fp, fn, tn)), *grad_func(tp, fp, fn, tn)
+def metric_func_with_gradient_autograd(metric_func, tp, fp, fn, tn):
+    grad_func = autograd.grad(metric_func, argnum=[0, 1, 2, 3])
+    return float(metric_func(tp, fp, fn, tn)), *grad_func(tp, fp, fn, tn)
 
 
 def _find_best_alpha(
-    utility_func,
+    metric_func,
     tp,
     fp,
     fn,
@@ -52,20 +106,18 @@ def _find_best_alpha(
     fp_i,
     fn_i,
     tn_i,
-    search_algo="lin",
+    search_algo="uniform",
     eps=0.001,
-    lin_search_step=0.001,
-):
-    conf_mat_comb = lambda alpha: utility_func(
+    uniform_search_step=0.001,
+) -> Tuple[float, float]:
+    conf_mat_comb = lambda alpha: metric_func(
         (1 - alpha) * tp + alpha * tp_i,
         (1 - alpha) * fp + alpha * fp_i,
         (1 - alpha) * fn + alpha * fn_i,
         (1 - alpha) * tn + alpha * tn_i,
     )
-    if search_algo == "lin":
-        return lin_search(0, 1, lin_search_step, conf_mat_comb)
-    elif search_algo == "bin":
-        return bin_search(0, 1, eps, conf_mat_comb)
+    if search_algo == "uniform":
+        return uniform_search(0, 1, uniform_search_step, conf_mat_comb)
     elif search_algo == "ternary":
         return ternary_search(0, 1, eps, conf_mat_comb)
     else:
@@ -75,68 +127,98 @@ def _find_best_alpha(
 def find_optimal_randomized_classifier_using_frank_wolfe(
     y_true: Matrix,
     y_proba: Matrix,
-    utility_func: Callable,
+    metric_func: Callable,
     k: int,
     max_iters: int = 100,
     init_classifier: Union[
         str, Tuple[DenseMatrix, DenseMatrix]
-    ] = "random",  # or "random"
+    ] = "random",  # or "default", "topk"
+    maximize=True,
     search_for_best_alpha: bool = True,
-    alpha_search_algo: str = "lin",
+    alpha_search_algo: str = "uniform",  # or "ternary"
     alpha_eps: float = 0.001,
-    alpha_lin_search_step: float = 0.001,
+    alpha_uniform_search_step: float = 0.001,
     skip_tn=False,
     seed=None,
-    verbose: bool = True,
+    verbose: bool = False,
     return_meta: bool = False,
-    grad_func="torch",
-    **kwargs,
 ):
-    log = print
-    if not verbose:
-        log = lambda *args, **kwargs: None
+    log_info(
+        "Starting searching for optimal randomized classifier using Frank-Wolfe algorithm ...",
+        verbose,
+    )
 
-    if type(y_true) != type(y_proba):
+    # Validate y_true and y_proba
+    if type(y_true) != type(y_proba) and isinstance(y_true, Matrix):
         raise ValueError(
-            f"y_true and y_proba have unsupported combination of types {type(y_true)}, {type(y_proba)}, should be both np.ndarray, both torch.Tensor, or both csr_matrix"
+            f"y_true and y_proba have unsupported combination of types {type(y_true)} and {type(y_proba)}, should be both np.ndarray, both torch.Tensor, or both csr_matrix"
         )
 
-    log("Starting Frank-Wolfe algorithm")
-    n, m = y_proba.shape
+    if y_true.shape != y_proba.shape:
+        raise ValueError(
+            f"y_true and y_proba must have the same shape, got {y_true.shape} and {y_proba.shape}"
+        )
 
-    if grad_func == "torch":
-        utility_func_with_gradient = utility_func_with_gradient_torch
-    elif grad_func == "autograd":
-        utility_func_with_gradient = utility_func_with_gradient_autograd
-    else:
-        raise ValueError(f"Unsupported grad_func: {grad_func}")
+    n, m = y_proba.shape
 
+    log_info(f"  Initializing initial {init_classifier} classifier ...", verbose)
+    # Initialize the classifiers matrix
     rng = np.random.default_rng(seed)
-    classifiers_a = np.zeros((max_iters, m), dtype=FLOAT_TYPE)
-    classifiers_b = np.zeros((max_iters, m), dtype=FLOAT_TYPE)
-    classifiers_proba = np.ones(max_iters, dtype=FLOAT_TYPE)
+    classifiers_a = np.zeros((max_iters, m), dtype=DefaultDataDType)
+    classifiers_b = np.zeros((max_iters, m), dtype=DefaultDataDType)
+    classifiers_proba = np.ones(max_iters, dtype=DefaultDataDType)
 
-    log(f"  Initializing initial {init_classifier} classifier ...")
     if init_classifier in ["default", "topk"]:
-        classifiers_a[0] = np.ones(m, dtype=FLOAT_TYPE)
-        classifiers_b[0] = np.full(m, -0.5, dtype=FLOAT_TYPE)
+        classifiers_a[0] = np.ones(m, dtype=DefaultDataDType)
+        classifiers_b[0] = np.full(m, -0.5, dtype=DefaultDataDType)
     elif init_classifier == "random":
         classifiers_a[0] = rng.random(m)
         classifiers_b[0] = rng.random(m) - 0.5
+    elif (
+        isinstance(init_classifier, (tuple, list))
+        and len(init_classifier) == 2
+        and isinstance(init_classifier[0], DenseMatrix)
+        and isinstance(init_classifier[1], DenseMatrix)
+        and init_classifier[0].shape == (m,)
+        and init_classifier[1].shape == (m,)
+    ):
+        if _torch_available and isinstance(init_classifier[0], torch.Tensor):
+            classifiers_a[0] = init_classifier[0].numpy()
+        else:
+            classifiers_a[0] = init_classifier[0]
+
+        if _torch_available and isinstance(init_classifier[1], torch.Tensor):
+            classifiers_b[0] = init_classifier[1].numpy()
+        else:
+            classifiers_b[0] = init_classifier[1]
     else:
-        raise ValueError(f"Unsupported type of init_classifier: {init_classifier}")
+        raise ValueError(
+            f"Unsupported type of init_classifier, it should be 'default', 'topk', 'random', or a tuple of two np.ndarray or torch.Tensor of shape (y_true.shape[1], )"
+        )
+
+    # Adjust types according to the type of y_true and y_proba
+    if isinstance(y_true, (np.ndarray, csr_matrix)):
+        metric_func_with_gradient = metric_func_with_gradient_autograd
+    elif _torch_available and isinstance(y_true, torch.Tensor):
+        metric_func_with_gradient = metric_func_with_gradient_torch
+        classifiers_a = torch.tensor(
+            classifiers_a, dtype=y_proba.dtype, device=y_proba.device
+        )
+        classifiers_b = torch.tensor(
+            classifiers_b, dtype=y_proba.dtype, device=y_proba.device
+        )
+        classifiers_proba = torch.tensor(
+            classifiers_proba, dtype=y_proba.dtype, device=y_proba.device
+        )
+
     y_pred_i = predict_weighted_per_instance(
         y_proba, k, th=0.0, a=classifiers_a[0], b=classifiers_b[0]
     )
 
-    log(
-        f"    y_true: {y_true.shape}, y_pred: {y_pred_i.shape}, y_proba: {y_proba.shape}"
-    )
-
     tp, fp, fn, tn = calculate_confusion_matrix(
         y_true, y_pred_i, normalize=True, skip_tn=skip_tn
     )
-    utility_i = float(utility_func(tp, fp, fn, tn))
+    utility_i = metric_func(tp, fp, fn, tn)
 
     if return_meta:
         meta = {
@@ -148,25 +230,29 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
         meta["utilities"].append(utility_i)
         meta["classifiers_utilities"].append(utility_i)
 
-    for i in range(1, max_iters):
-        log(f"  Starting iteration {i} ...")
-        old_utility, Gtp, Gfp, Gfn, Gtn = utility_func_with_gradient(
-            utility_func, tp, fp, fn, tn
+    for i in range(1, max_iters + 1):
+        log_info(f"  Starting iteration {i}/{max_iters} ...", verbose)
+        old_utility, Gtp, Gfp, Gfn, Gtn = metric_func_with_gradient(
+            metric_func, tp, fp, fn, tn
         )
 
         classifiers_a[i] = Gtp - Gfp - Gfn + Gtn
         classifiers_b[i] = Gfp - Gtn
+        if not maximize:
+            classifiers_a[i] *= -1
+            classifiers_b[i] *= -1
+
         y_pred_i = predict_weighted_per_instance(
             y_proba, k, th=0.0, a=classifiers_a[i], b=classifiers_b[i]
         )
         tp_i, fp_i, fn_i, tn_i = calculate_confusion_matrix(
             y_true, y_pred_i, normalize=True, skip_tn=skip_tn
         )
-        utility_i = float(utility_func(tp_i, fp_i, fn_i, tn_i))
+        utility_i = metric_func(tp_i, fp_i, fn_i, tn_i)
 
         if search_for_best_alpha:
             alpha, _ = _find_best_alpha(
-                utility_func,
+                metric_func,
                 tp,
                 fp,
                 fn,
@@ -177,7 +263,7 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
                 tn_i,
                 search_algo=alpha_search_algo,
                 eps=alpha_eps,
-                lin_search_step=alpha_lin_search_step,
+                uniform_search_step=alpha_uniform_search_step,
             )
         else:
             alpha = 2 / (i + 1)
@@ -189,7 +275,7 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
         fn = (1 - alpha) * fn + alpha * fn_i
         tn = (1 - alpha) * tn + alpha * tn_i
 
-        new_utility = float(utility_func(tp, fp, fn, tn))
+        new_utility = metric_func(tp, fp, fn, tn)
 
         if return_meta:
             meta["alphas"].append(alpha)
@@ -197,12 +283,13 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
             meta["utilities"].append(new_utility)
             meta["iters"] = i
 
-        log(
-            f"    Iteration {i} finished, alpha: {alpha}, utility: {old_utility} -> {new_utility}"
+        log_info(
+            f"    Iteration {i}/{max_iters} finished, alpha: {alpha}, utility: {old_utility} -> {new_utility}",
+            verbose,
         )
 
         if alpha < alpha_eps:
-            print(f"    Stopping because alpha is smaller than {alpha_eps}")
+            log_info(f"  Stopping because alpha is smaller than {alpha_eps}", verbose)
             # Truncate unused classifiers
             classifiers_a = classifiers_a[:i]
             classifiers_b = classifiers_b[:i]
@@ -229,26 +316,26 @@ def _predict_using_randomized_classifier_np(
     classifiers_a: np.ndarray,
     classifiers_b: np.ndarray,
     classifiers_proba: np.ndarray,
+    dtype: Optional[np.dtype] = None,
     seed: Optional[int] = None,
 ):
-    if seed is not None:
-        np.random.seed(seed)
+    rng = np.random.default_rng(seed)
 
     n, m = y_proba.shape
     c = classifiers_proba.shape[0]
     classifiers_range = np.arange(c)
-    result = np.zeros(y_proba.shape, dtype=FLOAT_TYPE)
+    y_pred = np.zeros(y_proba.shape, dtype=y_proba.dtype if dtype is None else dtype)
     for i in range(n):
-        c_i = np.random.choice(classifiers_range, p=classifiers_proba)
+        c_i = rng.choice(classifiers_range, p=classifiers_proba)
         gains = y_proba[i] * classifiers_a[c_i] + classifiers_b[c_i]
 
         if k > 0:
             top_k = np.argpartition(-gains, k)[:k]
-            result[i, top_k] = 1.0
+            y_pred[i, top_k] = 1.0
         else:
-            result[i, gains > 0] = 1.0
+            y_pred[i, gains > 0] = 1.0
 
-    return result
+    return y_pred
 
 
 def _predict_using_randomized_classifier_csr(
@@ -257,24 +344,30 @@ def _predict_using_randomized_classifier_csr(
     classifiers_a: np.ndarray,
     classifiers_b: np.ndarray,
     classifiers_proba: np.ndarray,
+    dtype: Optional[np.dtype] = None,
     seed: Optional[int] = None,
 ):
-    if seed is not None:
-        np.random.seed(seed)
+    rng = np.random.default_rng(seed)
 
     n, m = y_proba.shape
     c = classifiers_proba.shape[0]
     classifiers_range = np.arange(c)
 
     initial_row_size = k if k > 0 else 10
-    y_pred_data = np.ones(n * initial_row_size, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * initial_row_size, dtype=IND_TYPE)
-    y_pred_indptr = np.arange(n + 1, dtype=IND_TYPE) * initial_row_size
+    y_pred_data = np.ones(
+        n * initial_row_size, dtype=y_proba.data.dtype if dtype is None else dtype
+    )
+    y_pred_indices = np.zeros(n * initial_row_size, dtype=y_proba.indices.dtype)
+    y_pred_indptr = np.arange(n + 1, dtype=y_proba.indptr.dtype) * initial_row_size
 
     # TODO: Can be further optimized in numba
     for i in range(n):
-        c_i = np.random.choice(classifiers_range, p=classifiers_proba)
-        y_pred_data, y_pred_indices, y_pred_indptr = numba_predict_weighted_per_instance_csr_step(
+        c_i = rng.choice(classifiers_range, p=classifiers_proba)
+        (
+            y_pred_data,
+            y_pred_indices,
+            y_pred_indptr,
+        ) = numba_predict_weighted_per_instance_csr_step(
             y_pred_data,
             y_pred_indices,
             y_pred_indptr,
@@ -288,7 +381,7 @@ def _predict_using_randomized_classifier_csr(
             classifiers_b[c_i],
         )
 
-    y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE)
+    # y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE)
 
     return csr_matrix((y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m))
 
@@ -299,8 +392,9 @@ def predict_using_randomized_classifier(
     classifiers_a: DenseMatrix,
     classifiers_b: DenseMatrix,
     classifiers_proba: DenseMatrix,
+    dtype: Optional[DType] = None,
     seed: Optional[int] = None,
-):
+) -> Matrix:
     # Validate arguments
 
     # y_proba
@@ -343,13 +437,37 @@ def predict_using_randomized_classifier(
         )
 
     if isinstance(y_proba, np.ndarray):
-        return _predict_using_randomized_classifier_np(
-            y_proba, k, classifiers_a, classifiers_b, classifiers_proba, seed=seed
+        y_pred = _predict_using_randomized_classifier_np(
+            y_proba,
+            k,
+            classifiers_a,
+            classifiers_b,
+            classifiers_proba,
+            dtype=dtype,
+            seed=seed,
         )
     elif isinstance(y_proba, csr_matrix):
-        return _predict_using_randomized_classifier_csr(
-            y_proba, k, classifiers_a, classifiers_b, classifiers_proba, seed=seed
+        y_pred = _predict_using_randomized_classifier_csr(
+            y_proba,
+            k,
+            classifiers_a,
+            classifiers_b,
+            classifiers_proba,
+            dtype=dtype,
+            seed=seed,
         )
+    elif _torch_available and isinstance(y_proba, torch.Tensor):
+        y_pred = _predict_using_randomized_classifier_torch(
+            y_proba,
+            k,
+            classifiers_a,
+            classifiers_b,
+            classifiers_proba,
+            dtype=dtype,
+            seed=seed,
+        )
+
+    return y_pred
 
 
 class RandomizedWeightedClassifier:
diff --git a/xcolumns/numba_csr_functions.py b/xcolumns/numba_csr_functions.py
index d78caa3..c386838 100644
--- a/xcolumns/numba_csr_functions.py
+++ b/xcolumns/numba_csr_functions.py
@@ -9,11 +9,11 @@
 
 
 @njit(cache=True)
-def numba_first_k(n: int, k: int):
-    y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * k, dtype=IND_TYPE)
-    y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE)
-    first_k = np.arange(k, dtype=IND_TYPE)
+def numba_first_k(n: int, k: int) -> CSRMatrixAsTuple:
+    y_pred_data = np.ones(n * k, dtype=DefaultDataDType)
+    y_pred_indices = np.zeros(n * k, dtype=DefaultIndDType)
+    y_pred_indptr = np.zeros(n + 1, dtype=DefaultIndDType)
+    first_k = np.arange(k, dtype=DefaultIndDType)
     for i in range(n):
         y_pred_indices[i * k : (i + 1) * k] = first_k
         y_pred_indptr[i + 1] = y_pred_indptr[i] + k
@@ -28,7 +28,7 @@ def numba_random_at_k_from(
     m: int,
     k: int,
     seed: int = None,
-):
+) -> CSRMatrixAsTuple:
     """
     Selects k random labels for each instance.
     """
@@ -36,23 +36,18 @@ def numba_random_at_k_from(
     if seed is not None:
         np.random.seed(seed)
 
-    y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * k, dtype=IND_TYPE)
-    y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE)
-    labels_range = np.arange(m, dtype=IND_TYPE)
+    y_pred_data = np.ones(n * k, dtype=DefaultDataDType)
+    y_pred_indices = np.zeros(n * k, dtype=y_proba_indices)
+    y_pred_indptr = np.zeros(n + 1, dtype=y_proba_indptr)
+    labels_range = np.arange(m, dtype=y_proba_indices)
     for i in range(n):
         row_indices = y_proba_indices[y_proba_indptr[i] : y_proba_indptr[i + 1]]
         if row_indices.size >= k:
-            # y_pred_indices[i * k : (i + 1) * k] = np.random.choice(
-            #     row_indices, k, replace=False
-            # )
+            # This is faster then above np.random.choice(..., replace=False)
             y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice(
                 row_indices, k
             )
         else:
-            # y_pred_indices[i * k : (i + 1) * k] = np.random.choice(
-            #     labels_range, k, replace=False
-            # )
             y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice(
                 labels_range, k
             )
@@ -62,14 +57,14 @@ def numba_random_at_k_from(
 
 
 @njit(cache=True)
-def numba_fast_random_choice(array, k=-1):
+def numba_fast_random_choice(array, k=-1) -> np.ndarray:
     """
     Selects k random elements from array.
     """
     n = array.size
     if k < 0:
         k = array.size
-    index = np.arange(n, dtype=IND_TYPE)
+    index = np.arange(n, dtype=DefaultIndDType)
     for i in range(k):
         j = random.randint(i, n - 1)
         index[i], index[j] = index[j], index[i]
@@ -77,7 +72,7 @@ def numba_fast_random_choice(array, k=-1):
 
 
 @njit(cache=True)
-def numba_random_at_k(n: int, m: int, k: int, seed: int = None):
+def numba_random_at_k(n: int, m: int, k: int, seed: int = None) -> CSRMatrixAsTuple:
     """
     Selects k random labels for each instance.
     """
@@ -86,14 +81,11 @@ def numba_random_at_k(n: int, m: int, k: int, seed: int = None):
         # np.random.seed(seed) np.random.choice seems to be quite slow here
         random.seed(seed)
 
-    y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * k, dtype=IND_TYPE)
-    y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE)
-    labels_range = np.arange(m, dtype=IND_TYPE)
+    y_pred_data = np.ones(n * k, dtype=DefaultDataDType)
+    y_pred_indices = np.zeros(n * k, dtype=DefaultIndDType)
+    y_pred_indptr = np.zeros(n + 1, dtype=DefaultIndDType)
+    labels_range = np.arange(m, dtype=DefaultIndDType)
     for i in range(n):
-        # y_pred_indices[i * k : (i + 1) * k] = np.random.choice(
-        #     labels_range, k, replace=False
-        # )
         y_pred_indices[i * k : (i + 1) * k] = numba_fast_random_choice(labels_range, k)
         y_pred_indptr[i + 1] = y_pred_indptr[i] + k
 
@@ -111,8 +103,8 @@ def numba_csr_vec_mul_vec(
     """
     i = j = k = 0
     new_data_size = min(a_data.size, b_data.size)
-    new_data = np.zeros(new_data_size, dtype=FLOAT_TYPE)
-    new_indices = np.zeros(new_data_size, dtype=IND_TYPE)
+    new_data = np.zeros(new_data_size, dtype=a_data.dtype)
+    new_indices = np.zeros(new_data_size, dtype=a_indices.dtype)
     while i < a_indices.size and j < b_indices.size:
         # print(i, j, k, a_indices[i], b_indices[j], a_indices.size, b_indices.size)
         if a_indices[i] < b_indices[j]:
@@ -144,7 +136,7 @@ def numba_calculate_sum_0_csr_mat_mul_mat(
     Gives the same result as a.multiply(b).sum(axis=0) where a and b are sparse vectors.
     Requires a and b to have sorted y_proba_indices (in ascending order).
     """
-    result = np.zeros(m, dtype=FLOAT_TYPE)
+    result = np.zeros(m, dtype=a_data.dtype)
     for i in range(n):
         a_start, a_end = a_indptr[i], a_indptr[i + 1]
         b_start, b_end = b_indptr[i], b_indptr[i + 1]
@@ -172,8 +164,8 @@ def numba_csr_vec_mul_ones_minus_vec(
     """
     i = j = k = 0
     new_data_size = a_data.size + b_data.size
-    new_data = np.zeros(new_data_size, dtype=FLOAT_TYPE)
-    new_indices = np.zeros(new_data_size, dtype=IND_TYPE)
+    new_data = np.zeros(new_data_size, dtype=a_data.dtype)
+    new_indices = np.zeros(new_data_size, dtype=a_indices.dtype)
     while i < a_indices.size:
         # print(i, j, k, a_indices[i], b_indices[j], a_indices.size, b_indices.size)
         if j >= b_indices.size or a_indices[i] < b_indices[j]:
@@ -209,7 +201,7 @@ def numba_calculate_sum_0_csr_mat_mul_ones_minus_mat(
     Gives the same result as a.multiply(ones - b).sum(axis=0) where a and b are sparse matrices but makes it more efficient.
     Requires a and b to have sorted y_proba_indices (in ascending order).
     """
-    result = np.zeros(m, dtype=FLOAT_TYPE)
+    result = np.zeros(m, dtype=a_data.dtype)
     for i in range(n):
         a_start, a_end = a_indptr[i], a_indptr[i + 1]
         b_start, b_end = b_indptr[i], b_indptr[i + 1]
@@ -236,7 +228,7 @@ def numba_calculate_prod_0_csr_mat_mul_ones_minus_mat(
     Gives the same result as a.multiply(ones - b).prod(axis=0) where a and b are sparse matrices but makes it more efficient.
     Requires a and b to have sorted y_proba_indices (in ascending order).
     """
-    result = np.ones(m, dtype=FLOAT_TYPE)
+    result = np.ones(m, dtype=a_data.dtype)
     for i in range(n):
         a_start, a_end = a_indptr[i], a_indptr[i + 1]
         b_start, b_end = b_indptr[i], b_indptr[i + 1]
@@ -331,7 +323,7 @@ def numba_argtopk_csr(y_proba_data, y_proba_indices, k) -> np.ndarray:
 
 @njit(cache=True)
 def numba_resize(arr, new_size, fill) -> np.ndarray:
-    new_arr = np.zeros(new_size, arr.dtype)
+    new_arr = np.zeros(new_size, dtype=arr.dtype)
     new_arr[: arr.size] = arr
     if fill != 0:
         new_arr[arr.size :] = fill
@@ -349,7 +341,7 @@ def numba_set_gains_csr(
     k,
     th,
     is_insert=True,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> CSRMatrixAsTuple:
     """
     Sets top k gains or gains > th for the i-th instance in the y_pred csr matrix.
     """
@@ -412,7 +404,7 @@ def numba_predict_weighted_per_instance_csr_step(
     a: Optional[np.ndarray] = None,
     b: Optional[np.ndarray] = None,
     is_insert=True,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> CSRMatrixAsTuple:
     gains_data = y_proba_data[y_proba_indptr[i] : y_proba_indptr[i + 1]]
     gains_indices = y_proba_indices[y_proba_indptr[i] : y_proba_indptr[i + 1]]
 
@@ -444,12 +436,12 @@ def numba_predict_weighted_per_instance_csr(
     th: float = 0,
     a: Optional[np.ndarray] = None,
     b: Optional[np.ndarray] = None,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> CSRMatrixAsTuple:
     n = y_proba_indptr.size - 1
     initial_row_size = k if k > 0 else 10
-    y_pred_data = np.ones(n * initial_row_size, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * initial_row_size, dtype=IND_TYPE)
-    y_pred_indptr = np.arange(n + 1, dtype=IND_TYPE) * initial_row_size
+    y_pred_data = np.ones(n * initial_row_size, dtype=y_proba_data.dtype)
+    y_pred_indices = np.zeros(n * initial_row_size, dtype=y_proba_indices.dtype)
+    y_pred_indptr = np.arange(n + 1, dtype=y_proba_indptr.dtype) * initial_row_size
 
     # This can be done in parallel, but Numba parallelism seems to not work well here
     for i in range(n):
@@ -474,7 +466,6 @@ def numba_predict_weighted_per_instance_csr(
 
     y_pred_indices = y_pred_indices[: y_pred_indptr[-1]]
     y_pred_data = y_pred_data[: y_pred_indptr[-1]]
-    # y_pred_data = np.ones(y_pred_indices.size, dtype=FLOAT_TYPE)
 
     return y_pred_data, y_pred_indices, y_pred_indptr
 
@@ -488,13 +479,13 @@ def numba_predict_macro_balanced_accuracy(
     m: int,
     k: int,
     marginals: np.ndarray,
-) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+) -> CSRMatrixAsTuple:
     """
     Predicts k labels for each instance according to the optimal strategy for macro-balanced accuracy.
     """
-    y_pred_data = np.ones(n * k, dtype=FLOAT_TYPE)
-    y_pred_indices = np.zeros(n * k, dtype=IND_TYPE)
-    y_pred_indptr = np.zeros(n + 1, dtype=IND_TYPE)
+    y_pred_data = np.ones(n * k, dtype=y_proba_data.dtype)
+    y_pred_indices = np.zeros(n * k, dtype=y_proba_indices.dtype)
+    y_pred_indptr = np.zeros(n + 1, dtype=y_proba_indptr.dtype)
 
     for i in range(n):
         row_data = y_proba_data[y_proba_indptr[i] : y_proba_indptr[i + 1]]
diff --git a/xcolumns/types.py b/xcolumns/types.py
index 287c8dc..3166543 100644
--- a/xcolumns/types.py
+++ b/xcolumns/types.py
@@ -1,24 +1,25 @@
+from typing import Tuple, Union
+
 import numpy as np
 from scipy.sparse import csr_matrix
-from typing import Union
+
 
 DType = np.dtype
 Number = Union[int, float, np.number]
 DenseMatrix = np.ndarray
 Matrix = Union[np.ndarray, csr_matrix]
+CSRMatrixAsTuple = Tuple[np.ndarray, np.ndarray, np.ndarray]
+DefaultIndDType = np.int32
+DefaultDataDType = np.float32
+
 
 # Add torch.Tensor and torch.dtype to matrix types if torch is available
 try:
     import torch
+
     DTypes = Union[DType, torch.dtype]
     DenseMatrix = Union[DenseMatrix, torch.Tensor]
     Matrix = Union[Matrix, torch.Tensor]
     TORCH_FLOAT_TYPE = torch.float32
 except ImportError:
     pass
-
-# TODO: remove these hardcoded types
-IND_TYPE = np.int32  # type of indices in CSR matrix
-INT_TYPE = np.int32
-FLOAT_TYPE = np.float32
-
diff --git a/xcolumns/utils.py b/xcolumns/utils.py
index b8871a0..0cc69f9 100644
--- a/xcolumns/utils.py
+++ b/xcolumns/utils.py
@@ -1,3 +1,4 @@
+import logging
 from typing import Callable, List, Tuple, Union
 
 import numpy as np
@@ -7,6 +8,35 @@
 from .types import *
 
 
+########################################################################################
+# Logger
+########################################################################################
+
+
+logger = logging.getLogger("xcolumns")
+
+
+def log(msg: str, verbose: bool, level: int = logging.INFO):
+    if verbose:
+        logger.log(level, msg)
+
+
+def log_info(msg: str, verbose: bool):
+    log(msg, verbose, level=logging.INFO)
+
+
+def log_debug(msg: str, verbose: bool):
+    log(msg, verbose, level=logging.DEBUG)
+
+
+def log_warning(msg: str, verbose: bool):
+    log(msg, verbose, level=logging.WARNING)
+
+
+def log_error(msg: str, verbose: bool):
+    log(msg, verbose, level=logging.ERROR)
+
+
 ########################################################################################
 # Functions for generating matrices with random prediction at k
 ########################################################################################
@@ -70,7 +100,7 @@ def construct_csr_matrix(
 ########################################################################################
 
 
-def lin_search(
+def uniform_search(
     low: float, high: float, step: float, func: Callable
 ) -> Tuple[float, float]:
     best = low
@@ -83,23 +113,6 @@ def lin_search(
     return best, best_val
 
 
-def bin_search(
-    low: float, high: float, eps: float, func: Callable
-) -> Tuple[float, float]:
-    while high - low > eps:
-        mid = (low + high) / 2
-        mid_next = (mid + high) / 2
-
-        if func(mid) < func(mid_next):
-            high = mid_next
-        else:
-            low = mid
-
-    best = (low + high) / 2
-    best_val = func(best)
-    return best, best_val
-
-
 def ternary_search(
     low: float, high: float, eps: float, func: Callable
 ) -> Tuple[float, float]:
diff --git a/xcolumns/weighted_prediction.py b/xcolumns/weighted_prediction.py
index 26c8736..c823808 100644
--- a/xcolumns/weighted_prediction.py
+++ b/xcolumns/weighted_prediction.py
@@ -1,5 +1,5 @@
 from time import time
-from typing import Optional, Union
+from typing import Optional, Tuple, Union
 
 import numpy as np
 from scipy.sparse import csr_matrix
@@ -85,6 +85,11 @@ def _predict_weighted_per_instance_csr(
     b: Optional[np.ndarray] = None,
     dtype: Optional[np.dtype] = None,
 ) -> csr_matrix:
+    if a is not None and a.dtype != y_proba.dtype:
+        a = a.astype(y_proba.dtype)
+    if b is not None and b.dtype != y_proba.dtype:
+        b = b.astype(y_proba.dtype)
+
     n, m = y_proba.shape
     (
         y_pred_data,
@@ -93,7 +98,9 @@ def _predict_weighted_per_instance_csr(
     ) = numba_predict_weighted_per_instance_csr(
         *unpack_csr_matrix(y_proba), k, th, a, b
     )
-    return csr_matrix((y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m))
+    return csr_matrix(
+        (y_pred_data, y_pred_indices, y_pred_indptr), shape=(n, m), dtype=dtype
+    )
 
 
 def predict_weighted_per_instance(
@@ -104,7 +111,7 @@ def predict_weighted_per_instance(
     b: Optional[DenseMatrix] = None,
     dtype: Optional[DType] = None,
     return_meta: bool = False,
-) -> Matrix:
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     """
     Predict ... TODO: Add description
     """
@@ -176,7 +183,7 @@ def predict_top_k(
     y_proba: Matrix,
     k: int,
     return_meta: bool = False,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     return predict_weighted_per_instance(y_proba, k=k, return_meta=return_meta)
 
 
@@ -187,8 +194,7 @@ def predict_for_optimal_macro_recall(  # (for population)
     priors: DenseMatrix,
     epsilon: float = 1e-6,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if priors.shape[0] != y_proba.shape[1]:
         raise ValueError("priors must be of shape (y_proba[1],)")
 
@@ -203,8 +209,7 @@ def predict_inv_propensity_weighted_instance(
     k: int,
     inv_ps: DenseMatrix,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if inv_ps.shape[0] != y_proba.shape[1]:
         raise ValueError("inv_ps must be of shape (y_proba[1],)")
 
@@ -219,8 +224,7 @@ def predict_log_weighted_per_instance(
     priors: DenseMatrix,
     epsilon: float = 1e-6,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if priors.shape[0] != y_proba.shape[1]:
         raise ValueError("priors must be of shape (y_proba[1],)")
 
@@ -236,8 +240,7 @@ def predict_sqrt_weighted_instance(
     priors: DenseMatrix,
     epsilon: float = 1e-6,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if priors.shape[0] != y_proba.shape[1]:
         raise ValueError("priors must be of shape (y_proba[1],)")
 
@@ -254,8 +257,7 @@ def predict_power_law_weighted_instance(
     beta: float,
     epsilon: float = 1e-6,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if priors.shape[0] != y_proba.shape[1]:
         raise ValueError("priors must be of shape (y_proba[1],)")
 
@@ -269,8 +271,7 @@ def predict_for_optimal_instance_precision(
     y_proba: Matrix,
     k: int,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     return predict_top_k(y_proba, k=k, return_meta=return_meta)
 
 
@@ -279,7 +280,7 @@ def _predict_for_optimal_macro_balanced_accuracy_np(
     k: int,
     priors: DenseMatrix,
     epsilon: float = 1e-6,
-):
+) -> Matrix:
     n, m = y_proba.shape
     assert priors.shape == (m,)
     priors = priors + epsilon
@@ -299,7 +300,7 @@ def _predict_for_optimal_macro_balanced_accuracy_csr(
     k: int,
     priors: DenseMatrix,
     epsilon: float = 1e-6,
-):
+) -> Matrix:
     n, m = y_proba.shape
     assert priors.shape == (m,)
     priors = priors + epsilon
@@ -320,8 +321,7 @@ def predict_for_optimal_macro_balanced_accuracy(  # (for population)
     priors: DenseMatrix,
     epsilon: float = 1e-6,
     return_meta: bool = False,
-    **kwargs,
-):
+) -> Union[Matrix, Tuple[Matrix, dict]]:
     if priors.shape[0] != y_proba.shape[1]:
         raise ValueError("priors must be of shape (y_proba[1],)")