Add support for torch.tensors to Frank Wolfe algorithm, improve dtype…

… handling
mwydmuch · Mar 5, 2024 · 8ae4ff0 · 8ae4ff0
1 parent e23c508
commit 8ae4ff0
Show file tree

Hide file tree

Showing 11 changed files with 464 additions and 261 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -11,10 +11,11 @@
 @pytest.fixture(autouse=True, scope="session")
 def generated_test_data():
     seed = 2024
+    max_top_k = 5
     x, y = make_multilabel_classification(
         n_samples=100000,
         n_features=100,
-        n_classes=20,
+        n_classes=50,
         n_labels=3,
         length=50,
         allow_unlabeled=True,
@@ -31,12 +32,24 @@ def generated_test_data():
     )
     clf = MultiOutputClassifier(LogisticRegression()).fit(x_train, y_train)
 
-    def clf_predict(clf, x):
+    def clf_predict(clf, x, sparsfy=False):
         """
         Process the output of a multioutput classifier to get the marginal probabilities of each label (class).
         """
         y_proba = clf.predict_proba(x)
-        return np.array(y_proba)[:, :, 1].transpose()
+
+        # Convert the output of MultiOutputClassifier that contains the marginal probabilities for both P(y=0) and P(y=1) to just a matrix of P(y=1)
+        y_proba = np.array(y_proba)[:, :, 1].transpose()
+
+        # Sparify the matrix
+        if sparsfy:
+            top_k_thr = -np.partition(-y_proba, max_top_k, axis=1)[:, max_top_k]
+            top_k_thr = top_k_thr.reshape((-1,))
+            top_k_thr[top_k_thr >= 0.1] = 0.1
+            y_proba[y_proba < top_k_thr[:, None]] = 0
+            assert ((y_proba > 0).sum(axis=1) >= max_top_k).all()
+
+        return y_proba
 
     y_proba_train = clf_predict(clf, x_train)
     y_proba_val = clf_predict(clf, x_val)

diff --git a/tests/test_frank_wolfe.py b/tests/test_frank_wolfe.py
@@ -9,16 +9,22 @@
 
 
 def _run_frank_wolfe(y_val, y_proba_val, y_test, y_proba_test, k, init_a, init_b):
+    print(f"input dtype={y_proba_val.dtype}")
+    if isinstance(y_proba_val, csr_matrix):
+        print(
+            f"  csr_matrix nnz={y_proba_val.nnz}, shape={y_proba_val.shape}, sparsity={y_proba_val.nnz / y_proba_val.shape[0] / y_proba_val.shape[1]}"
+        )
     rnd_clf, meta = find_optimal_randomized_classifier_using_frank_wolfe(
         y_val,
         y_proba_val,
         macro_fmeasure_on_conf_matrix,
         k,
-        grad_func="torch",
         return_meta=True,
         seed=2024,
         init_classifier=(init_a, init_b),
+        verbose=True,
     )
+    print(f"  time={meta['time']}s")
 
     y_pred = rnd_clf.predict(y_proba_test, seed=2024)
     assert type(y_pred) == type(y_proba_test)
@@ -84,6 +90,9 @@ def test_frank_wolfe(generated_test_data):
     assert np_C == csr_C == torch_C
 
     # Compare with top_k
-    assert macro_fmeasure_on_conf_matrix(
-        np_C.tp, np_C.fp, np_C.fn, np_C.tn
-    ) > macro_fmeasure_on_conf_matrix(top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn)
+    fw_score = macro_fmeasure_on_conf_matrix(np_C.tp, np_C.fp, np_C.fn, np_C.tn)
+    top_k_score = macro_fmeasure_on_conf_matrix(
+        top_k_C.tp, top_k_C.fp, top_k_C.fn, top_k_C.tn
+    )
+    print(f"fw_score={fw_score}, top_k_score={top_k_score}")
+    assert fw_score >= top_k_score
diff --git a/tests/test_weighted_prediction.py b/tests/test_weighted_prediction.py
@@ -6,12 +6,23 @@
 from xcolumns.weighted_prediction import predict_weighted_per_instance
 
 
-def _run_weighted_prediction(y_true, y_proba, k, a, b):
-    y_pred = predict_weighted_per_instance(y_proba, k, a=a, b=b)
+def _run_and_test_weighted_prediction(y_true, y_proba, k, a, b):
+    print(f"input dtype={y_proba.dtype}")
+    if isinstance(y_proba, csr_matrix):
+        print(
+            f"  csr_matrix nnz={y_proba.nnz}, shape={y_proba.shape}, sparsity={y_proba.nnz / y_proba.shape[0] / y_proba.shape[1]}"
+        )
+    y_pred, meta = predict_weighted_per_instance(y_proba, k, a=a, b=b, return_meta=True)
+    print(f"  time={meta['time']}s")
+
     assert type(y_pred) == type(y_proba)
+    assert y_pred.dtype == y_proba.dtype
     assert (y_pred.sum(axis=1) == k).all()
 
-    return calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False)
+    return (
+        calculate_confusion_matrix(y_true, y_pred, normalize=False, skip_tn=False),
+        y_pred,
+    )
 
 
 def test_weighted_prediction(generated_test_data):
@@ -30,18 +41,27 @@ def test_weighted_prediction(generated_test_data):
 
     # Generate random weights
     a = np.random.rand(y_proba_train.shape[1])
-    b = np.random.rand(y_proba_train.shape[1])
+    # b = np.random.rand(y_proba_train.shape[1])
+    b = np.zeros(y_proba_train.shape[1])
 
     # Run numpy implementation
-    np_C = _run_weighted_prediction(y_test, y_proba_test, k, a, b)
+    y_proba_test = y_proba_test.astype(np.float64)
+    np_C_64, np_pred_64 = _run_and_test_weighted_prediction(
+        y_test, y_proba_test, k, a, b
+    )
+
+    y_proba_test = y_proba_test.astype(np.float32)
+    np_C_32, np_pred_32 = _run_and_test_weighted_prediction(
+        y_test, y_proba_test, k, a, b
+    )
 
     # Run csr_matrix implementation
-    csr_C = _run_weighted_prediction(
+    csr_C, csr_pred = _run_and_test_weighted_prediction(
         csr_matrix(y_test), csr_matrix(y_proba_test), k, a, b
     )
 
     # Run torch implementation
-    torch_C = _run_weighted_prediction(
+    torch_C, torch_pred = _run_and_test_weighted_prediction(
         torch.tensor(y_test),
         torch.tensor(y_proba_test),
         k,
@@ -56,4 +76,4 @@ def test_weighted_prediction(generated_test_data):
     torch_C.tn = torch_C.tn.numpy()
 
     # Compere if all implementations are equal
-    assert np_C == csr_C == torch_C
+    assert np_C_64 == np_C_32 == csr_C == torch_C
diff --git a/xcolumns/__init__.py b/xcolumns/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.1"
+__version__ = "0.0.2"