From 6507587574279be257a8cd8f7649d828ef323bcb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 15:21:03 +0100
Subject: [PATCH 01/26]  add test for clsuter and a method for getting groups

---
 hidimstat/scenario.py                      |  30 +++++-
 hidimstat/test/test_clustered_inference.py | 119 +++++++++++++++++++--
 2 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index 9b8d4a5c..113c875a 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -16,6 +16,8 @@ def multivariate_1D_simulation(
     rho=0.0,
     shuffle=True,
     seed=0,
+    nb_group=0,
+    size_group=0,
 ):
     """Generate 1D data with Toeplitz design matrix
 
@@ -41,11 +43,18 @@ def multivariate_1D_simulation(
 
     seed : int
         Seed used for generating design matrix and noise.
+    
+    nb_group : int
+        Number of groups.
+        
+    size_group : int
+        Size of each group.
 
     Returns
     -------
     X : ndarray, shape (n_samples, n_features)
         Design matrix.
+        if their is some groups, the first rows contains the groups values.
 
     y : ndarray, shape (n_samples,)
         Target.
@@ -58,17 +67,30 @@ def multivariate_1D_simulation(
     """
 
     rng = np.random.default_rng(seed)
-
-    X = np.zeros((n_samples, n_features))
-    X[:, 0] = rng.standard_normal(n_samples)
+    if nb_group < 0 or size_group < 0:
+        raise ValueError(
+            "The number of groups and their size must be positive."
+        )
+    n_generate_samples = n_samples - nb_group * size_group 
+    if n_generate_samples <= 0:
+        raise ValueError(
+            "The number of samples is too small compate to the number "
+            "of group and their size to gerate the data."
+        )
+    X = np.zeros((n_generate_samples, n_features))
+    X[:, 0] = rng.standard_normal(n_generate_samples)
 
     for i in np.arange(1, n_features):
-        rand_vector = ((1 - rho**2) ** 0.5) * rng.standard_normal(n_samples)
+        rand_vector = ((1 - rho**2) ** 0.5) * rng.standard_normal(n_generate_samples)
         X[:, i] = rho * X[:, i - 1] + rand_vector
 
     if shuffle:
         rng.shuffle(X.T)
 
+    if nb_group > 0:
+        groups = np.repeat(X[:nb_group], size_group, axis=0)
+        X = np.vstack((groups, X))
+
     beta = np.zeros(n_features)
     beta[0:support_size] = 1.0
 
diff --git a/hidimstat/test/test_clustered_inference.py b/hidimstat/test/test_clustered_inference.py
index 94e04ef0..13fdcd42 100644
--- a/hidimstat/test/test_clustered_inference.py
+++ b/hidimstat/test/test_clustered_inference.py
@@ -1,7 +1,7 @@
 """
 Test the clustered_inference module
 """
-
+import pytest
 import numpy as np
 from numpy.testing import assert_almost_equal
 from sklearn.cluster import FeatureAgglomeration
@@ -14,16 +14,17 @@
 )
 
 
-def test_clustered_inference():
-    """Testing the procedure on two simulations with a 1D data structure and
-    with n << p: the first test has no temporal dimension, the second has a
-    temporal dimension. The support is connected and of size 10, it must be
-    recovered with a small spatial tolerance parametrized by `margin_size`.
+# Scenario 1: data with no temporal dimension
+def test_clustered_inference_no_temporal():
+    """
+    Testing the procedure on one simulations with a 1D data structure and
+    with n << p: no temporal dimension. The support is connected and of 
+    size 10, it must be recovered with a small spatial tolerance 
+    parametrized by `margin_size`.
     Computing one sided p-values, we want low p-values for the features of
-    the support and p-values close to 0.5 for the others."""
+    the support and p-values close to 0.5 for the others.
+    """
 
-    # Scenario 1: data with no temporal dimension
-    # ###########################################
     n_samples, n_features = 100, 2000
     support_size = 10
     sigma = 5.0
@@ -63,8 +64,17 @@ def test_clustered_inference():
         pval_corr[extended_support:200], expected[extended_support:200], decimal=1
     )
 
-    # Scenario 2: temporal data
-    # #########################
+
+# Scenario 2: temporal data
+def test_clustered_inference_temporal():
+    """
+    Testing the procedure on two simulations with a 1D data structure and
+    with n << p: with a temporal dimension. The support is connected and 
+    of size 10, it must be recovered with a small spatial tolerance 
+    parametrized by `margin_size`.
+    Computing one sided p-values, we want low p-values for the features of
+    the support and p-values close to 0.5 for the others.
+    """
     n_samples, n_features, n_times = 200, 2000, 10
     support_size = 10
     sigma = 5.0
@@ -104,3 +114,90 @@ def test_clustered_inference():
     assert_almost_equal(
         pval_corr[extended_support:], expected[extended_support:], decimal=1
     )
+
+
+# Scenario 3: data with no temporal dimension and with groups
+def test_clustered_inference_no_temporal_groups():
+    """
+    Testing the procedure on one simulations with a 1D data structure and
+    with n << p: no temporal dimension. The support is connected and of 
+    size 10, it must be recovered with a small spatial tolerance 
+    parametrized by `margin_size`. 
+    We group the sample in 10 groups of size 10. 
+    Computing one sided p-values, we want low p-values for the features of
+    the support and p-values close to 0.5 for the others.
+    """
+
+    n_samples, n_features = 100, 2000
+    support_size = 10
+    nb_groups = 9
+    size = 10
+    sigma = 5.0
+    rho = 0.95
+    n_clusters = 200
+    margin_size = 5
+    interior_support = support_size - margin_size
+    extended_support = support_size + margin_size
+
+    X_init, y, beta, epsilon = multivariate_1D_simulation(
+        n_samples=n_samples,
+        n_features=n_features,
+        support_size=support_size,
+        sigma=sigma,
+        rho=rho,
+        shuffle=False,
+        seed=2,
+        nb_group=nb_groups,
+        size_group=size,
+    )
+    groups = np.concatenate([[i]*size for i in range(nb_groups+1)])
+
+    y = y - np.mean(y)
+    X_init = X_init - np.mean(X_init, axis=0)
+
+    connectivity = image.grid_to_graph(n_x=n_features, n_y=1, n_z=1)
+    ward = FeatureAgglomeration(
+        n_clusters=n_clusters, connectivity=connectivity, linkage="ward"
+    )
+
+    beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+        clustered_inference(X_init, y, ward, n_clusters, groups=groups)
+    )
+
+    expected = 0.5 * np.ones(n_features)
+    expected[:support_size] = 0.0
+
+    assert_almost_equal(pval_corr[:interior_support], expected[:interior_support])
+    assert_almost_equal(
+        pval_corr[extended_support:200], expected[extended_support:200], decimal=1
+    )
+    
+
+
+# Scenario 1: data with no temporal dimension
+def test_clustered_inference_exception_methods():
+    """
+    Testing the procedure on two simulations with a 1D data structure and
+    checking that the procedure raises an exception when an unknown method is
+    provided.
+    """
+    n_samples, n_features = 100, 2000
+    n_clusters = 200
+
+    X_init, y, beta, epsilon = multivariate_1D_simulation(
+        n_samples=n_samples,
+        n_features=n_features,
+        shuffle=False,
+        seed=2,
+    )
+
+    y = y - np.mean(y)
+    X_init = X_init - np.mean(X_init, axis=0)
+
+    connectivity = image.grid_to_graph(n_x=n_features, n_y=1, n_z=1)
+    ward = FeatureAgglomeration(
+        n_clusters=n_clusters, connectivity=connectivity, linkage="ward"
+    )
+
+    with pytest.raises(ValueError, match="Unknow method"):
+        clustered_inference(X_init, y, ward, n_clusters, method="lll")
\ No newline at end of file

From 2233a5f84d448ee88e2ad0e838c75b4aaf1ddc43 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 15:21:31 +0100
Subject: [PATCH 02/26] Add test for dcrt

---
 hidimstat/test/test_dcrt.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/hidimstat/test/test_dcrt.py b/hidimstat/test/test_dcrt.py
index da2085cb..ac2b2061 100644
--- a/hidimstat/test/test_dcrt.py
+++ b/hidimstat/test/test_dcrt.py
@@ -15,7 +15,7 @@ def test_dcrt_lasso():
     """
     X, y = make_regression(n_samples=100, n_features=10, noise=0.2, random_state=2024)
     # Checking if a loss != 'least_square'
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError, match="test loss is not supported."):
         _ = dcrt_zero(
             X,
             y,
@@ -27,7 +27,7 @@ def test_dcrt_lasso():
         )
 
     # Checking for a different statistic
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError, match="test statistic is not supported."):
         _ = dcrt_zero(
             X,
             y,
@@ -36,6 +36,30 @@ def test_dcrt_lasso():
             statistic="test",
             random_state=2024,
         )
+    
+    # Checking for bad selection of screening_threshold
+    result_th_screen_bad = dcrt_zero(
+            X,
+            y,
+            screening_threshold = 0,
+            screening=True,
+            verbose=False,
+        )
+    assert result_th_screen_bad.size == 0
+
+    # Checking for bad selection of screening_threshold with verbose
+    result_th_screen_bad = dcrt_zero(
+            X,
+            y,
+            screening_threshold = 0,
+            screening=True,
+            verbose=True,
+    )
+    
+    assert len(result_th_screen_bad) == 3
+    assert result_th_screen_bad[0].size == 0
+    assert np.all(result_th_screen_bad[1] == np.ones(10))
+    assert np.all(result_th_screen_bad[2] == np.zeros(10))
 
     # Checking with and without screening
     results_no_screening = dcrt_zero(
@@ -70,7 +94,8 @@ def test_dcrt_lasso():
         X,
         y,
         refit=True,
-        screening=False,
+        screening=True,
+        screening_threshold = 50,
         verbose=True,
         statistic="residual",
         random_state=2024,

From cb02e0c41fa47a6ae0287711e582be62494ed9f6 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 15:21:57 +0100
Subject: [PATCH 03/26] Add commit for desparsified functions and fixed one bug

---
 hidimstat/desparsified_lasso.py           |  2 +-
 hidimstat/test/test_desparsified_lasso.py | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hidimstat/desparsified_lasso.py b/hidimstat/desparsified_lasso.py
index aaa5e03e..5081a47c 100644
--- a/hidimstat/desparsified_lasso.py
+++ b/hidimstat/desparsified_lasso.py
@@ -57,7 +57,7 @@ def _compute_residuals(
 
     else:
 
-        ValueError("The only regression method available is 'lasso'")
+        raise ValueError("The only regression method available is 'lasso'")
 
     clf.fit(X_new, y)
     z = y - clf.predict(X_new)
diff --git a/hidimstat/test/test_desparsified_lasso.py b/hidimstat/test/test_desparsified_lasso.py
index 713bd2c5..007c26cc 100644
--- a/hidimstat/test/test_desparsified_lasso.py
+++ b/hidimstat/test/test_desparsified_lasso.py
@@ -5,6 +5,7 @@
 import numpy as np
 from numpy.testing import assert_almost_equal, assert_equal
 from scipy.linalg import toeplitz
+import pytest
 
 from hidimstat.desparsified_lasso import desparsified_group_lasso, desparsified_lasso
 from hidimstat.scenario import (
@@ -48,6 +49,15 @@ def test_desparsified_lasso():
     assert_equal(cb_max > beta, True)
 
 
+def test_desparsified_lasso_exception():
+    """Testing exception of not using lasso"""
+
+    X, y, beta, noise = multivariate_1D_simulation(
+    )
+    with pytest.raises(ValueError, match="The only regression method available is 'lasso'"):
+        _ = desparsified_lasso(X, y, residual_method='test')
+
+
 def test_desparsified_group_lasso():
     """Testing the procedure on a simulation with no structure and
     a support of size 2. Computing one-sided p-values, we want

From d36dc6818aa370f2b1edafa54e15230f5cfd34ac Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 16:14:42 +0100
Subject: [PATCH 04/26] Add tests for ensemble clustered

---
 .../test/test_ensemble_clustered_inference.py | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/hidimstat/test/test_ensemble_clustered_inference.py b/hidimstat/test/test_ensemble_clustered_inference.py
index 5129a229..39381108 100644
--- a/hidimstat/test/test_ensemble_clustered_inference.py
+++ b/hidimstat/test/test_ensemble_clustered_inference.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+import pytest
 from numpy.testing import assert_almost_equal
 from sklearn.cluster import FeatureAgglomeration
 from sklearn.feature_extraction import image
@@ -125,3 +126,63 @@ def test_ensemble_clustered_inference():
     assert_almost_equal(
         pval_corr[extended_support:], expected[extended_support:], decimal=1
     )
+
+    beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+        ensemble_clustered_inference(
+            X,
+            Y,
+            ward,
+            n_clusters,
+            n_bootstraps=n_bootstraps,
+            inference_method=inference_method,
+            ensembling_method="medians"
+        )
+    )
+
+    expected = 0.5 * np.ones(n_features)
+    expected[:support_size] = 0.0
+
+    assert_almost_equal(
+        pval_corr[:interior_support], expected[:interior_support], decimal=3
+    )
+    assert_almost_equal(
+        pval_corr[extended_support:], expected[extended_support:], decimal=1
+    )
+
+
+
+
+def test_ensemble_clustered_inference_exception():
+    """
+    Test the raise of exception
+    """
+    n_samples, n_features = 100, 2000
+    n_clusters = 10
+    X, Y, beta, epsilon = multivariate_1D_simulation(
+        n_samples=n_samples, n_features=n_features, 
+    )
+    connectivity = image.grid_to_graph(n_x=n_features, n_y=1, n_z=1)
+    ward = FeatureAgglomeration(
+        n_clusters=n_clusters, connectivity=connectivity, linkage="ward"
+    )
+    
+    # Test the raise of exception
+    with pytest.raises(ValueError, match="Unknown ensembling method."):
+        ensemble_clustered_inference(
+            X,
+            Y,
+            ward,
+            n_clusters,
+            ensembling_method="wrong_method"
+        )
+    
+    
+    with pytest.raises(ValueError, match="'memory' must be None or a string corresponding "
+            + "to the path of the caching directory."):
+        ensemble_clustered_inference(
+            X,
+            Y,
+            ward,
+            n_clusters,
+            memory=[]
+        )
\ No newline at end of file

From 65282b56f8ae25f27e2b669b2be8ba3041f32afb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 16:44:29 +0100
Subject: [PATCH 05/26] Add test for the knockoff functions.

---
 hidimstat/test/test_generate_knockoff.py    | 37 ++++++++++++++++++---
 hidimstat/test/test_knockoff_aggregation.py | 23 ++++++++++---
 hidimstat/test/test_model_x_knockoff.py     | 15 +++++++++
 3 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/hidimstat/test/test_generate_knockoff.py b/hidimstat/test/test_generate_knockoff.py
index 54056f79..44430ec7 100644
--- a/hidimstat/test/test_generate_knockoff.py
+++ b/hidimstat/test/test_generate_knockoff.py
@@ -6,12 +6,12 @@
     _estimate_distribution,
     gaussian_knockoff_generation,
 )
+import pytest
 
-SEED = 42
-fdr = 0.1
 
 
-def test_estimate_distribution():
+def test_estimate_distribution_wolf():
+    SEED = 42
     n = 100
     p = 50
     X, y, _, non_zero = simu_data(n, p, seed=SEED)
@@ -20,18 +20,45 @@ def test_estimate_distribution():
     assert mu.size == p
     assert Sigma.shape == (p, p)
 
-    mu, Sigma = _estimate_distribution(X, cov_estimator="graph_lasso")
+
+def test_estimate_distribution_lasso():
+    SEED = 42
+    n = 100
+    p = 50
+    X, y, _, non_zero = simu_data(n, p, seed=SEED)
+    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="graph_lasso")
 
     assert mu.size == p
     assert Sigma.shape == (p, p)
 
+def test_gaussian_knockoff_estimate_exception():
+    SEED = 42
+    n = 100
+    p = 50
+    X, y, _, non_zero = simu_data(n, p, seed=SEED)
+    with pytest.raises(ValueError, match="test is not a valid covariance estimated method"):
+        _estimate_distribution(X, shrink=True, cov_estimator="test")
+
 
 def test_gaussian_knockoff_equi():
+    SEED = 42
     n = 100
     p = 50
     X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    mu, Sigma = _estimate_distribution(X, cov_estimator="ledoit_wolf")
+    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
 
     X_tilde = gaussian_knockoff_generation(X, mu, Sigma, method="equi", seed=SEED * 2)
 
     assert X_tilde.shape == (n, p)
+
+
+def test_gaussian_knockoff_exception():
+    SEED = 42
+    n = 100
+    p = 50
+    X, y, _, non_zero = simu_data(n, p, seed=SEED)
+    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
+
+    with pytest.raises(ValueError, match="test is not a valid knockoff contriction method"):
+        gaussian_knockoff_generation(X, mu, Sigma, method="test", seed=SEED * 2)
+
diff --git a/hidimstat/test/test_knockoff_aggregation.py b/hidimstat/test/test_knockoff_aggregation.py
index a2984fc7..992ef6dd 100644
--- a/hidimstat/test/test_knockoff_aggregation.py
+++ b/hidimstat/test/test_knockoff_aggregation.py
@@ -54,6 +54,21 @@ def test_knockoff_aggregation():
     assert fdp_verbose < 0.5
     assert power_verbose > 0.1
 
+    selected_ko, test_scored, thres, X_tilde = model_x_knockoff(X, y, fdr=fdr, seed=5, verbose=True)
+    #TODO add tests for the 3 other variables
+
+    np.testing.assert_array_equal(selected_no_verbose, selected_ko)
+
+    fdp_no_verbose, power_no_verbose = cal_fdp_power(
+        selected_no_verbose, non_zero_index
+    )
+    fdp_verbose, power_verbose = cal_fdp_power(selected_verbose, non_zero_index)
+
+    assert fdp_verbose < 0.5
+    assert power_verbose > 0.1
+
+
+
     # Using e-values aggregation (verbose vs no verbose)
 
     selected_verbose, aggregated_pval, pvals = knockoff_aggregation(
@@ -89,7 +104,7 @@ def test_knockoff_aggregation():
     assert power_no_verbose > 0.1
 
     # Checking wrong type for random_state
-    with pytest.raises(Exception):
+    with pytest.raises(TypeError, match="Wrong type for random_state"):
         _ = knockoff_aggregation(
             X,
             y,
@@ -97,20 +112,18 @@ def test_knockoff_aggregation():
         )
 
     # Checking value for offset not belonging to (0,1)
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError, match="'offset' must be either 0 or 1"):
         _ = knockoff_aggregation(
             X,
             y,
             offset=2,
             method="quantile",
-            random_state="test",
         )
 
-    with pytest.raises(Exception):
+    with pytest.raises(ValueError, match="'offset' must be either 0 or 1"):
         _ = knockoff_aggregation(
             X,
             y,
             offset=2,
             method="e-values",
-            random_state="test",
         )
diff --git a/hidimstat/test/test_model_x_knockoff.py b/hidimstat/test/test_model_x_knockoff.py
index 504e4694..6caa3902 100644
--- a/hidimstat/test/test_model_x_knockoff.py
+++ b/hidimstat/test/test_model_x_knockoff.py
@@ -16,3 +16,18 @@ def test_model_x_knockoff():
 
     assert fdp <= 0.2
     assert power > 0.7
+
+
+def test_model_x_knockoff_with_verbose():
+    n = 300
+    p = 300
+    X, y, _, non_zero = simu_data(n, p, seed=seed)
+    ko_result, test_scored, thres, X_tilde = model_x_knockoff(X, y, fdr=fdr, seed=5, verbose=True)
+    #TODO add tests for the 3 other variables
+
+    fdp, power = cal_fdp_power(ko_result, non_zero)
+
+    assert fdp <= 0.2
+    assert power > 0.7
+
+

From 968500c85d12572c00aa6a257fb73b1d12bbc901 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 17:00:43 +0100
Subject: [PATCH 06/26] Add test for Reid methods

---
 hidimstat/test/test_noise_std.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/hidimstat/test/test_noise_std.py b/hidimstat/test/test_noise_std.py
index cb890529..a5cd94b7 100644
--- a/hidimstat/test/test_noise_std.py
+++ b/hidimstat/test/test_noise_std.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+import pytest
 from numpy.testing import assert_almost_equal
 from scipy.linalg import toeplitz
 
@@ -129,6 +130,28 @@ def test_group_reid():
     assert_almost_equal(np.log(np.min(error_ratio)), 0.0, decimal=1)
 
 
+def test_group_reid_exception():
+    """
+    Test the exception of group reid
+    """
+    n_samples = 30
+    n_features = 50
+    n_times = 10
+    X, Y, beta, noise = multivariate_temporal_simulation(
+        n_samples=n_samples,
+        n_features=n_features,
+        n_times=n_times,
+    )
+    # max_iter=1 to get a better coverage
+    with pytest.raises(ValueError, match= "The requested AR order is to high with "
+                                        + "respect to the number of time steps."
+                       ):
+        group_reid(X, Y, stationary=True, method="AR", order = 100)
+    
+    with pytest.raises(ValueError, match= "Unknown method for estimating the covariance matrix"):
+        group_reid(X, Y, method="test", order = 100)
+
+
 def test_empirical_snr():
     """Computing empirical signal to noise ratio from the target `y`,
     the data `X` and the true parameter vector `beta` in a simple

From 05628cf17c2cb7a97e28a792662afe42214d444a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 17:04:16 +0100
Subject: [PATCH 07/26] Add test for permutation_test

---
 hidimstat/test/test_permutation_test.py | 30 +++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/hidimstat/test/test_permutation_test.py b/hidimstat/test/test_permutation_test.py
index 6e1835db..c50162dc 100644
--- a/hidimstat/test/test_permutation_test.py
+++ b/hidimstat/test/test_permutation_test.py
@@ -38,3 +38,33 @@ def test_permutation_test():
     expected[:support_size] = 0.0
 
     assert_almost_equal(pval_corr, expected, decimal=1)
+
+def test_permutation_test_with_SVR():
+    """Testing the procedure on a simulation with no structure and a support
+    of size 1. Computing one-sided p-values, we want a low p-value
+    for the first feature and p-values close to 0.5 for the others."""
+
+    n_samples, n_features = 20, 50
+    support_size = 1
+    sigma = 0.1
+    rho = 0.0
+
+    X_init, y, beta, noise = multivariate_1D_simulation(
+        n_samples=n_samples,
+        n_features=n_features,
+        support_size=support_size,
+        sigma=sigma,
+        rho=rho,
+        shuffle=False,
+        seed=3,
+    )
+
+    y = y - np.mean(y)
+    X_init = X_init - np.mean(X_init, axis=0)
+
+    pval_corr, one_minus_pval_corr = permutation_test_cv(X_init, y, n_permutations=100, C=0.1)
+
+    expected = 0.5 * np.ones(n_features)
+    expected[:support_size] = 0.0
+
+    assert_almost_equal(pval_corr, expected, decimal=1)
\ No newline at end of file

From 744f2011d544d60751273e4712204c1cbd10c6a1 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 17:28:39 +0100
Subject: [PATCH 08/26] Add tests and fixe name of one variable

---
 hidimstat/scenario.py                      | 10 +++---
 hidimstat/test/test_clustered_inference.py |  2 +-
 hidimstat/test/test_scenario.py            | 38 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index 113c875a..de0de0fe 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -17,7 +17,7 @@ def multivariate_1D_simulation(
     shuffle=True,
     seed=0,
     nb_group=0,
-    size_group=0,
+    group_size=0,
 ):
     """Generate 1D data with Toeplitz design matrix
 
@@ -47,7 +47,7 @@ def multivariate_1D_simulation(
     nb_group : int
         Number of groups.
         
-    size_group : int
+    group_size : int
         Size of each group.
 
     Returns
@@ -67,11 +67,11 @@ def multivariate_1D_simulation(
     """
 
     rng = np.random.default_rng(seed)
-    if nb_group < 0 or size_group < 0:
+    if nb_group < 0 or group_size < 0:
         raise ValueError(
             "The number of groups and their size must be positive."
         )
-    n_generate_samples = n_samples - nb_group * size_group 
+    n_generate_samples = n_samples - nb_group * group_size 
     if n_generate_samples <= 0:
         raise ValueError(
             "The number of samples is too small compate to the number "
@@ -88,7 +88,7 @@ def multivariate_1D_simulation(
         rng.shuffle(X.T)
 
     if nb_group > 0:
-        groups = np.repeat(X[:nb_group], size_group, axis=0)
+        groups = np.repeat(X[:nb_group], group_size, axis=0)
         X = np.vstack((groups, X))
 
     beta = np.zeros(n_features)
diff --git a/hidimstat/test/test_clustered_inference.py b/hidimstat/test/test_clustered_inference.py
index 13fdcd42..cdecbacf 100644
--- a/hidimstat/test/test_clustered_inference.py
+++ b/hidimstat/test/test_clustered_inference.py
@@ -148,7 +148,7 @@ def test_clustered_inference_no_temporal_groups():
         shuffle=False,
         seed=2,
         nb_group=nb_groups,
-        size_group=size,
+        group_size=size,
     )
     groups = np.concatenate([[i]*size for i in range(nb_groups+1)])
 
diff --git a/hidimstat/test/test_scenario.py b/hidimstat/test/test_scenario.py
index 503f6848..4441ffc3 100644
--- a/hidimstat/test/test_scenario.py
+++ b/hidimstat/test/test_scenario.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+import pytest
 from numpy.testing import assert_almost_equal, assert_equal
 
 from hidimstat.scenario import (
@@ -55,6 +56,41 @@ def test_multivariate_1D_simulation():
     X, y, beta, noise = multivariate_1D_simulation()
     rho_hat = np.corrcoef(X[:, 19], X[:, 20])[0, 1]
     assert_almost_equal(rho_hat, 0, decimal=1)
+    
+    # Test 3
+    X, y, beta, noise = multivariate_1D_simulation(n_samples=9, nb_group=2, group_size=3)
+    corr_X = np.corrcoef(X)
+    assert_almost_equal(corr_X[0, 0], 1, decimal=1)
+    assert_almost_equal(corr_X[0, 1], 1, decimal=1)
+    assert_almost_equal(corr_X[0, 2], 1, decimal=1)
+    assert_almost_equal(corr_X[1, 0], 1, decimal=1)
+    assert_almost_equal(corr_X[1, 1], 1, decimal=1)
+    assert_almost_equal(corr_X[1, 2], 1, decimal=1)
+    assert_almost_equal(corr_X[2, 0], 1, decimal=1)
+    assert_almost_equal(corr_X[2, 1], 1, decimal=1)
+    assert_almost_equal(corr_X[2, 2], 1, decimal=1)
+    assert_almost_equal(corr_X[3, 3], 1, decimal=1)
+    assert_almost_equal(corr_X[3, 4], 1, decimal=1)
+    assert_almost_equal(corr_X[3, 5], 1, decimal=1)
+    assert_almost_equal(corr_X[4, 3], 1, decimal=1)
+    assert_almost_equal(corr_X[4, 4], 1, decimal=1)
+    assert_almost_equal(corr_X[4, 5], 1, decimal=1)
+    assert_almost_equal(corr_X[5, 3], 1, decimal=1)
+    assert_almost_equal(corr_X[5, 4], 1, decimal=1)
+    assert_almost_equal(corr_X[5, 5], 1, decimal=1)
+    
+    
+
+def test_multivariate_1D_simulation_exception():
+    """
+    Test when the input paramters is not correct.
+    """
+    with pytest.raises(ValueError, match="The number of groups and their size must be positive."):
+        multivariate_1D_simulation(nb_group=-1)
+    
+    with pytest.raises(ValueError, match="The number of samples is too small compate to the number "
+            "of group and their size to gerate the data."):
+        multivariate_1D_simulation(n_samples=10, nb_group=2, group_size=6)
 
 
 def test_multivariate_simulation():
@@ -162,3 +198,5 @@ def test_multivariate_temporal_simulation():
     rho_data_hat = np.corrcoef(X[:, 19], X[:, 20])[0, 1]
     assert_almost_equal(rho_data_hat, rho_data, decimal=1)
     assert_equal(Y, np.dot(X, beta) + noise)
+
+

From f3473c7b065187e103adb56a7fca08009f1dbbc1 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 18:11:30 +0100
Subject: [PATCH 09/26] Add test and change name of variables

---
 hidimstat/test/test_utils.py | 50 ++++++++++++++++++++++++++----------
 hidimstat/utils.py           | 14 +++++-----
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/hidimstat/test/test_utils.py b/hidimstat/test/test_utils.py
index cc1c1190..24bbf74e 100644
--- a/hidimstat/test/test_utils.py
+++ b/hidimstat/test/test_utils.py
@@ -6,6 +6,33 @@
 seed = 42
 
 
+def test_quantile_aggregation():
+    """
+    This function tests the application of the quantile aggregation method
+    """
+    col = np.arange(11)
+    p_values = np.tile(col, (10, 1)).T / 100
+    gamma_min = 0.05
+
+    assert_array_almost_equal(0.1 * quantile_aggregation(p_values, 0.1), [0.01] * 10)
+    assert_array_almost_equal(0.1 * quantile_aggregation(p_values, 0.1, adaptive=True, gamma_min=gamma_min)
+                                      / (1 - np.log(gamma_min)), [0.01] * 10)
+    assert_array_almost_equal(0.3 * quantile_aggregation(p_values, 0.3), [0.03] * 10)
+    assert_array_almost_equal(0.3 * quantile_aggregation(p_values, 0.3, adaptive=True, gamma_min=gamma_min)
+                                     / (1 - np.log(gamma_min)), [0.03] * 10)
+    assert_array_almost_equal(0.5 * quantile_aggregation(p_values, 0.5), [0.05] * 10)
+    assert_array_almost_equal(0.5 * quantile_aggregation(p_values, 0.5, adaptive=True, gamma_min=gamma_min)
+                                     / (1 - np.log(gamma_min)), [0.05] * 10)
+
+    # One p-value within the quantile aggregation method
+    p_values = np.array([0.0])
+
+    assert quantile_aggregation(p_values) == 0.0
+    assert quantile_aggregation(p_values, adaptive=True) == 0.0
+    
+    
+
+
 def test_fdr_threshold():
     """
     This function tests the application of the False Discovery Rate (FDR)
@@ -50,19 +77,14 @@ def test_cal_fdp_power():
     assert fdp == 2 / len(selected)
     assert power == 18 / len(non_zero_index)
 
+    # test for zero selection
+    fdp, power = cal_fdp_power(np.array([]), non_zero_index)
 
-def test_quantile_aggregation():
-    """
-    This function tests the application of the quantile aggregation method
-    """
-    col = np.arange(11)
-    p_values = np.tile(col, (10, 1)).T / 100
-
-    assert_array_almost_equal(0.1 * quantile_aggregation(p_values, 0.1), [0.01] * 10)
-    assert_array_almost_equal(0.3 * quantile_aggregation(p_values, 0.3), [0.03] * 10)
-    assert_array_almost_equal(0.5 * quantile_aggregation(p_values, 0.5), [0.05] * 10)
-
-    # One p-value within the quantile aggregation method
-    p_values = np.array([0.0])
+    assert fdp == 0
+    assert power == 0
 
-    assert quantile_aggregation(p_values) == 0.0
+    # test for shift in index
+    fdp, power = cal_fdp_power(selected+1, non_zero_index, r_index=True)
+    
+    assert fdp == 2 / len(selected)
+    assert power == 18 / len(non_zero_index)
\ No newline at end of file
diff --git a/hidimstat/utils.py b/hidimstat/utils.py
index efdc9336..418ee85a 100644
--- a/hidimstat/utils.py
+++ b/hidimstat/utils.py
@@ -84,19 +84,19 @@ def _bhq_threshold(pvals, fdr=0.1):
         return -1.0
 
 
-def _ebh_threshold(evals, fdr=0.1):
+def _ebh_threshold(pvals, fdr=0.1):
     """e-BH procedure for FDR control (see Wang and Ramdas 2021)"""
-    n_features = len(evals)
-    evals_sorted = -np.sort(-evals)  # sort in descending order
+    n_features = len(pvals)
+    pvals_sorted = -np.sort(-pvals)  # sort in descending order
     selected_index = 2 * n_features
     for i in range(n_features - 1, -1, -1):
-        if evals_sorted[i] >= n_features / (fdr * (i + 1)):
+        if pvals_sorted[i] >= n_features / (fdr * (i + 1)):
             selected_index = i
             break
     if selected_index <= n_features:
-        return evals_sorted[selected_index]
+        return pvals_sorted[selected_index]
     else:
-        return np.infty
+        return np.inf
 
 
 def _bhy_threshold(pvals, reshaping_function=None, fdr=0.1):
@@ -147,7 +147,7 @@ def _fixed_quantile_aggregation(pvals, gamma=0.5):
 def _adaptive_quantile_aggregation(pvals, gamma_min=0.05):
     """adaptive version of the quantile aggregation method, Meinshausen et al.
     (2008)"""
-    gammas = np.arange(gamma_min, 1.05, 0.05)
+    gammas = np.linspace(gamma_min, 1., 30)
     list_Q = np.array([_fixed_quantile_aggregation(pvals, gamma) for gamma in gammas])
 
     return np.minimum(1, (1 - np.log(gamma_min)) * list_Q.min(0))

From 9b872a83a994639dc56f884fe2088f413d61d9da Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 16 Dec 2024 18:15:33 +0100
Subject: [PATCH 10/26] Unecessary file

---
 hidimstat/setup.py | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 hidimstat/setup.py

diff --git a/hidimstat/setup.py b/hidimstat/setup.py
deleted file mode 100644
index 15c6389d..00000000
--- a/hidimstat/setup.py
+++ /dev/null
@@ -1,14 +0,0 @@
-def configuration(parent_package="", top_path=None):
-    from numpy.distutils.misc_util import Configuration
-
-    config = Configuration("plotting", parent_package, top_path)
-
-    config.add_subpackage("tests")
-
-    return config
-
-
-if __name__ == "__main__":
-    from numpy.distutils.core import setup
-
-    setup(**configuration(top_path="").todict())

From 4683e5880198f93d6b814c002cc158d3099a0fbb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 18 Dec 2024 12:49:06 +0100
Subject: [PATCH 11/26] Fix typo

---
 hidimstat/scenario.py                         | 10 +++---
 hidimstat/test/test_clustered_inference.py    | 22 ++++++-------
 hidimstat/test/test_dcrt.py                   | 28 ++++++++--------
 hidimstat/test/test_desparsified_lasso.py     |  9 ++---
 .../test/test_ensemble_clustered_inference.py | 33 +++++++------------
 hidimstat/test/test_generate_knockoff.py      | 11 ++++---
 hidimstat/test/test_knockoff_aggregation.py   |  8 ++---
 hidimstat/test/test_loco.py                   |  1 -
 hidimstat/test/test_model_x_knockoff.py       |  8 ++---
 hidimstat/test/test_noise_std.py              | 18 ++++++----
 hidimstat/test/test_permutation_test.py       |  7 ++--
 hidimstat/test/test_scenario.py               | 24 ++++++++------
 hidimstat/test/test_utils.py                  | 32 +++++++++++-------
 hidimstat/utils.py                            |  2 +-
 14 files changed, 113 insertions(+), 100 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index de0de0fe..88130c9b 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -43,10 +43,10 @@ def multivariate_1D_simulation(
 
     seed : int
         Seed used for generating design matrix and noise.
-    
+
     nb_group : int
         Number of groups.
-        
+
     group_size : int
         Size of each group.
 
@@ -68,10 +68,8 @@ def multivariate_1D_simulation(
 
     rng = np.random.default_rng(seed)
     if nb_group < 0 or group_size < 0:
-        raise ValueError(
-            "The number of groups and their size must be positive."
-        )
-    n_generate_samples = n_samples - nb_group * group_size 
+        raise ValueError("The number of groups and their size must be positive.")
+    n_generate_samples = n_samples - nb_group * group_size
     if n_generate_samples <= 0:
         raise ValueError(
             "The number of samples is too small compate to the number "
diff --git a/hidimstat/test/test_clustered_inference.py b/hidimstat/test/test_clustered_inference.py
index cdecbacf..490b5365 100644
--- a/hidimstat/test/test_clustered_inference.py
+++ b/hidimstat/test/test_clustered_inference.py
@@ -1,6 +1,7 @@
 """
 Test the clustered_inference module
 """
+
 import pytest
 import numpy as np
 from numpy.testing import assert_almost_equal
@@ -18,8 +19,8 @@
 def test_clustered_inference_no_temporal():
     """
     Testing the procedure on one simulations with a 1D data structure and
-    with n << p: no temporal dimension. The support is connected and of 
-    size 10, it must be recovered with a small spatial tolerance 
+    with n << p: no temporal dimension. The support is connected and of
+    size 10, it must be recovered with a small spatial tolerance
     parametrized by `margin_size`.
     Computing one sided p-values, we want low p-values for the features of
     the support and p-values close to 0.5 for the others.
@@ -69,8 +70,8 @@ def test_clustered_inference_no_temporal():
 def test_clustered_inference_temporal():
     """
     Testing the procedure on two simulations with a 1D data structure and
-    with n << p: with a temporal dimension. The support is connected and 
-    of size 10, it must be recovered with a small spatial tolerance 
+    with n << p: with a temporal dimension. The support is connected and
+    of size 10, it must be recovered with a small spatial tolerance
     parametrized by `margin_size`.
     Computing one sided p-values, we want low p-values for the features of
     the support and p-values close to 0.5 for the others.
@@ -120,10 +121,10 @@ def test_clustered_inference_temporal():
 def test_clustered_inference_no_temporal_groups():
     """
     Testing the procedure on one simulations with a 1D data structure and
-    with n << p: no temporal dimension. The support is connected and of 
-    size 10, it must be recovered with a small spatial tolerance 
-    parametrized by `margin_size`. 
-    We group the sample in 10 groups of size 10. 
+    with n << p: no temporal dimension. The support is connected and of
+    size 10, it must be recovered with a small spatial tolerance
+    parametrized by `margin_size`.
+    We group the sample in 10 groups of size 10.
     Computing one sided p-values, we want low p-values for the features of
     the support and p-values close to 0.5 for the others.
     """
@@ -150,7 +151,7 @@ def test_clustered_inference_no_temporal_groups():
         nb_group=nb_groups,
         group_size=size,
     )
-    groups = np.concatenate([[i]*size for i in range(nb_groups+1)])
+    groups = np.concatenate([[i] * size for i in range(nb_groups + 1)])
 
     y = y - np.mean(y)
     X_init = X_init - np.mean(X_init, axis=0)
@@ -171,7 +172,6 @@ def test_clustered_inference_no_temporal_groups():
     assert_almost_equal(
         pval_corr[extended_support:200], expected[extended_support:200], decimal=1
     )
-    
 
 
 # Scenario 1: data with no temporal dimension
@@ -200,4 +200,4 @@ def test_clustered_inference_exception_methods():
     )
 
     with pytest.raises(ValueError, match="Unknow method"):
-        clustered_inference(X_init, y, ward, n_clusters, method="lll")
\ No newline at end of file
+        clustered_inference(X_init, y, ward, n_clusters, method="lll")
diff --git a/hidimstat/test/test_dcrt.py b/hidimstat/test/test_dcrt.py
index ac2b2061..da5af8fd 100644
--- a/hidimstat/test/test_dcrt.py
+++ b/hidimstat/test/test_dcrt.py
@@ -36,26 +36,26 @@ def test_dcrt_lasso():
             statistic="test",
             random_state=2024,
         )
-    
+
     # Checking for bad selection of screening_threshold
     result_th_screen_bad = dcrt_zero(
-            X,
-            y,
-            screening_threshold = 0,
-            screening=True,
-            verbose=False,
-        )
+        X,
+        y,
+        screening_threshold=0,
+        screening=True,
+        verbose=False,
+    )
     assert result_th_screen_bad.size == 0
 
     # Checking for bad selection of screening_threshold with verbose
     result_th_screen_bad = dcrt_zero(
-            X,
-            y,
-            screening_threshold = 0,
-            screening=True,
-            verbose=True,
+        X,
+        y,
+        screening_threshold=0,
+        screening=True,
+        verbose=True,
     )
-    
+
     assert len(result_th_screen_bad) == 3
     assert result_th_screen_bad[0].size == 0
     assert np.all(result_th_screen_bad[1] == np.ones(10))
@@ -95,7 +95,7 @@ def test_dcrt_lasso():
         y,
         refit=True,
         screening=True,
-        screening_threshold = 50,
+        screening_threshold=50,
         verbose=True,
         statistic="residual",
         random_state=2024,
diff --git a/hidimstat/test/test_desparsified_lasso.py b/hidimstat/test/test_desparsified_lasso.py
index 007c26cc..69e26dbe 100644
--- a/hidimstat/test/test_desparsified_lasso.py
+++ b/hidimstat/test/test_desparsified_lasso.py
@@ -52,10 +52,11 @@ def test_desparsified_lasso():
 def test_desparsified_lasso_exception():
     """Testing exception of not using lasso"""
 
-    X, y, beta, noise = multivariate_1D_simulation(
-    )
-    with pytest.raises(ValueError, match="The only regression method available is 'lasso'"):
-        _ = desparsified_lasso(X, y, residual_method='test')
+    X, y, beta, noise = multivariate_1D_simulation()
+    with pytest.raises(
+        ValueError, match="The only regression method available is 'lasso'"
+    ):
+        _ = desparsified_lasso(X, y, residual_method="test")
 
 
 def test_desparsified_group_lasso():
diff --git a/hidimstat/test/test_ensemble_clustered_inference.py b/hidimstat/test/test_ensemble_clustered_inference.py
index 39381108..0d59e221 100644
--- a/hidimstat/test/test_ensemble_clustered_inference.py
+++ b/hidimstat/test/test_ensemble_clustered_inference.py
@@ -135,7 +135,7 @@ def test_ensemble_clustered_inference():
             n_clusters,
             n_bootstraps=n_bootstraps,
             inference_method=inference_method,
-            ensembling_method="medians"
+            ensembling_method="medians",
         )
     )
 
@@ -150,8 +150,6 @@ def test_ensemble_clustered_inference():
     )
 
 
-
-
 def test_ensemble_clustered_inference_exception():
     """
     Test the raise of exception
@@ -159,30 +157,23 @@ def test_ensemble_clustered_inference_exception():
     n_samples, n_features = 100, 2000
     n_clusters = 10
     X, Y, beta, epsilon = multivariate_1D_simulation(
-        n_samples=n_samples, n_features=n_features, 
+        n_samples=n_samples,
+        n_features=n_features,
     )
     connectivity = image.grid_to_graph(n_x=n_features, n_y=1, n_z=1)
     ward = FeatureAgglomeration(
         n_clusters=n_clusters, connectivity=connectivity, linkage="ward"
     )
-    
+
     # Test the raise of exception
     with pytest.raises(ValueError, match="Unknown ensembling method."):
         ensemble_clustered_inference(
-            X,
-            Y,
-            ward,
-            n_clusters,
-            ensembling_method="wrong_method"
+            X, Y, ward, n_clusters, ensembling_method="wrong_method"
         )
-    
-    
-    with pytest.raises(ValueError, match="'memory' must be None or a string corresponding "
-            + "to the path of the caching directory."):
-        ensemble_clustered_inference(
-            X,
-            Y,
-            ward,
-            n_clusters,
-            memory=[]
-        )
\ No newline at end of file
+
+    with pytest.raises(
+        ValueError,
+        match="'memory' must be None or a string corresponding "
+        + "to the path of the caching directory.",
+    ):
+        ensemble_clustered_inference(X, Y, ward, n_clusters, memory=[])
diff --git a/hidimstat/test/test_generate_knockoff.py b/hidimstat/test/test_generate_knockoff.py
index 44430ec7..ef858f67 100644
--- a/hidimstat/test/test_generate_knockoff.py
+++ b/hidimstat/test/test_generate_knockoff.py
@@ -9,7 +9,6 @@
 import pytest
 
 
-
 def test_estimate_distribution_wolf():
     SEED = 42
     n = 100
@@ -31,12 +30,15 @@ def test_estimate_distribution_lasso():
     assert mu.size == p
     assert Sigma.shape == (p, p)
 
+
 def test_gaussian_knockoff_estimate_exception():
     SEED = 42
     n = 100
     p = 50
     X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    with pytest.raises(ValueError, match="test is not a valid covariance estimated method"):
+    with pytest.raises(
+        ValueError, match="test is not a valid covariance estimated method"
+    ):
         _estimate_distribution(X, shrink=True, cov_estimator="test")
 
 
@@ -59,6 +61,7 @@ def test_gaussian_knockoff_exception():
     X, y, _, non_zero = simu_data(n, p, seed=SEED)
     mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
 
-    with pytest.raises(ValueError, match="test is not a valid knockoff contriction method"):
+    with pytest.raises(
+        ValueError, match="test is not a valid knockoff contriction method"
+    ):
         gaussian_knockoff_generation(X, mu, Sigma, method="test", seed=SEED * 2)
-
diff --git a/hidimstat/test/test_knockoff_aggregation.py b/hidimstat/test/test_knockoff_aggregation.py
index 992ef6dd..67d5628c 100644
--- a/hidimstat/test/test_knockoff_aggregation.py
+++ b/hidimstat/test/test_knockoff_aggregation.py
@@ -54,8 +54,10 @@ def test_knockoff_aggregation():
     assert fdp_verbose < 0.5
     assert power_verbose > 0.1
 
-    selected_ko, test_scored, thres, X_tilde = model_x_knockoff(X, y, fdr=fdr, seed=5, verbose=True)
-    #TODO add tests for the 3 other variables
+    selected_ko, test_scored, thres, X_tilde = model_x_knockoff(
+        X, y, fdr=fdr, seed=5, verbose=True
+    )
+    # TODO add tests for the 3 other variables
 
     np.testing.assert_array_equal(selected_no_verbose, selected_ko)
 
@@ -67,8 +69,6 @@ def test_knockoff_aggregation():
     assert fdp_verbose < 0.5
     assert power_verbose > 0.1
 
-
-
     # Using e-values aggregation (verbose vs no verbose)
 
     selected_verbose, aggregated_pval, pvals = knockoff_aggregation(
diff --git a/hidimstat/test/test_loco.py b/hidimstat/test/test_loco.py
index 67927513..a6eeaf00 100644
--- a/hidimstat/test/test_loco.py
+++ b/hidimstat/test/test_loco.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pandas as pd
 import pytest
-from sklearn.base import clone
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.metrics import log_loss
diff --git a/hidimstat/test/test_model_x_knockoff.py b/hidimstat/test/test_model_x_knockoff.py
index 6caa3902..fe56f887 100644
--- a/hidimstat/test/test_model_x_knockoff.py
+++ b/hidimstat/test/test_model_x_knockoff.py
@@ -22,12 +22,12 @@ def test_model_x_knockoff_with_verbose():
     n = 300
     p = 300
     X, y, _, non_zero = simu_data(n, p, seed=seed)
-    ko_result, test_scored, thres, X_tilde = model_x_knockoff(X, y, fdr=fdr, seed=5, verbose=True)
-    #TODO add tests for the 3 other variables
+    ko_result, test_scored, thres, X_tilde = model_x_knockoff(
+        X, y, fdr=fdr, seed=5, verbose=True
+    )
+    # TODO add tests for the 3 other variables
 
     fdp, power = cal_fdp_power(ko_result, non_zero)
 
     assert fdp <= 0.2
     assert power > 0.7
-
-
diff --git a/hidimstat/test/test_noise_std.py b/hidimstat/test/test_noise_std.py
index a5cd94b7..6766adea 100644
--- a/hidimstat/test/test_noise_std.py
+++ b/hidimstat/test/test_noise_std.py
@@ -143,13 +143,17 @@ def test_group_reid_exception():
         n_times=n_times,
     )
     # max_iter=1 to get a better coverage
-    with pytest.raises(ValueError, match= "The requested AR order is to high with "
-                                        + "respect to the number of time steps."
-                       ):
-        group_reid(X, Y, stationary=True, method="AR", order = 100)
-    
-    with pytest.raises(ValueError, match= "Unknown method for estimating the covariance matrix"):
-        group_reid(X, Y, method="test", order = 100)
+    with pytest.raises(
+        ValueError,
+        match="The requested AR order is to high with "
+        + "respect to the number of time steps.",
+    ):
+        group_reid(X, Y, stationary=True, method="AR", order=100)
+
+    with pytest.raises(
+        ValueError, match="Unknown method for estimating the covariance matrix"
+    ):
+        group_reid(X, Y, method="test", order=100)
 
 
 def test_empirical_snr():
diff --git a/hidimstat/test/test_permutation_test.py b/hidimstat/test/test_permutation_test.py
index c50162dc..dc94fecf 100644
--- a/hidimstat/test/test_permutation_test.py
+++ b/hidimstat/test/test_permutation_test.py
@@ -39,6 +39,7 @@ def test_permutation_test():
 
     assert_almost_equal(pval_corr, expected, decimal=1)
 
+
 def test_permutation_test_with_SVR():
     """Testing the procedure on a simulation with no structure and a support
     of size 1. Computing one-sided p-values, we want a low p-value
@@ -62,9 +63,11 @@ def test_permutation_test_with_SVR():
     y = y - np.mean(y)
     X_init = X_init - np.mean(X_init, axis=0)
 
-    pval_corr, one_minus_pval_corr = permutation_test_cv(X_init, y, n_permutations=100, C=0.1)
+    pval_corr, one_minus_pval_corr = permutation_test_cv(
+        X_init, y, n_permutations=100, C=0.1
+    )
 
     expected = 0.5 * np.ones(n_features)
     expected[:support_size] = 0.0
 
-    assert_almost_equal(pval_corr, expected, decimal=1)
\ No newline at end of file
+    assert_almost_equal(pval_corr, expected, decimal=1)
diff --git a/hidimstat/test/test_scenario.py b/hidimstat/test/test_scenario.py
index 4441ffc3..e6d706a6 100644
--- a/hidimstat/test/test_scenario.py
+++ b/hidimstat/test/test_scenario.py
@@ -56,9 +56,11 @@ def test_multivariate_1D_simulation():
     X, y, beta, noise = multivariate_1D_simulation()
     rho_hat = np.corrcoef(X[:, 19], X[:, 20])[0, 1]
     assert_almost_equal(rho_hat, 0, decimal=1)
-    
+
     # Test 3
-    X, y, beta, noise = multivariate_1D_simulation(n_samples=9, nb_group=2, group_size=3)
+    X, y, beta, noise = multivariate_1D_simulation(
+        n_samples=9, nb_group=2, group_size=3
+    )
     corr_X = np.corrcoef(X)
     assert_almost_equal(corr_X[0, 0], 1, decimal=1)
     assert_almost_equal(corr_X[0, 1], 1, decimal=1)
@@ -78,18 +80,22 @@ def test_multivariate_1D_simulation():
     assert_almost_equal(corr_X[5, 3], 1, decimal=1)
     assert_almost_equal(corr_X[5, 4], 1, decimal=1)
     assert_almost_equal(corr_X[5, 5], 1, decimal=1)
-    
-    
+
 
 def test_multivariate_1D_simulation_exception():
     """
     Test when the input paramters is not correct.
     """
-    with pytest.raises(ValueError, match="The number of groups and their size must be positive."):
+    with pytest.raises(
+        ValueError, match="The number of groups and their size must be positive."
+    ):
         multivariate_1D_simulation(nb_group=-1)
-    
-    with pytest.raises(ValueError, match="The number of samples is too small compate to the number "
-            "of group and their size to gerate the data."):
+
+    with pytest.raises(
+        ValueError,
+        match="The number of samples is too small compate to the number "
+        "of group and their size to gerate the data.",
+    ):
         multivariate_1D_simulation(n_samples=10, nb_group=2, group_size=6)
 
 
@@ -198,5 +204,3 @@ def test_multivariate_temporal_simulation():
     rho_data_hat = np.corrcoef(X[:, 19], X[:, 20])[0, 1]
     assert_almost_equal(rho_data_hat, rho_data, decimal=1)
     assert_equal(Y, np.dot(X, beta) + noise)
-
-
diff --git a/hidimstat/test/test_utils.py b/hidimstat/test/test_utils.py
index 24bbf74e..f6efe8b0 100644
--- a/hidimstat/test/test_utils.py
+++ b/hidimstat/test/test_utils.py
@@ -15,22 +15,32 @@ def test_quantile_aggregation():
     gamma_min = 0.05
 
     assert_array_almost_equal(0.1 * quantile_aggregation(p_values, 0.1), [0.01] * 10)
-    assert_array_almost_equal(0.1 * quantile_aggregation(p_values, 0.1, adaptive=True, gamma_min=gamma_min)
-                                      / (1 - np.log(gamma_min)), [0.01] * 10)
+    assert_array_almost_equal(
+        0.1
+        * quantile_aggregation(p_values, 0.1, adaptive=True, gamma_min=gamma_min)
+        / (1 - np.log(gamma_min)),
+        [0.01] * 10,
+    )
     assert_array_almost_equal(0.3 * quantile_aggregation(p_values, 0.3), [0.03] * 10)
-    assert_array_almost_equal(0.3 * quantile_aggregation(p_values, 0.3, adaptive=True, gamma_min=gamma_min)
-                                     / (1 - np.log(gamma_min)), [0.03] * 10)
+    assert_array_almost_equal(
+        0.3
+        * quantile_aggregation(p_values, 0.3, adaptive=True, gamma_min=gamma_min)
+        / (1 - np.log(gamma_min)),
+        [0.03] * 10,
+    )
     assert_array_almost_equal(0.5 * quantile_aggregation(p_values, 0.5), [0.05] * 10)
-    assert_array_almost_equal(0.5 * quantile_aggregation(p_values, 0.5, adaptive=True, gamma_min=gamma_min)
-                                     / (1 - np.log(gamma_min)), [0.05] * 10)
+    assert_array_almost_equal(
+        0.5
+        * quantile_aggregation(p_values, 0.5, adaptive=True, gamma_min=gamma_min)
+        / (1 - np.log(gamma_min)),
+        [0.05] * 10,
+    )
 
     # One p-value within the quantile aggregation method
     p_values = np.array([0.0])
 
     assert quantile_aggregation(p_values) == 0.0
     assert quantile_aggregation(p_values, adaptive=True) == 0.0
-    
-    
 
 
 def test_fdr_threshold():
@@ -84,7 +94,7 @@ def test_cal_fdp_power():
     assert power == 0
 
     # test for shift in index
-    fdp, power = cal_fdp_power(selected+1, non_zero_index, r_index=True)
-    
+    fdp, power = cal_fdp_power(selected + 1, non_zero_index, r_index=True)
+
     assert fdp == 2 / len(selected)
-    assert power == 18 / len(non_zero_index)
\ No newline at end of file
+    assert power == 18 / len(non_zero_index)
diff --git a/hidimstat/utils.py b/hidimstat/utils.py
index 418ee85a..0c1abc44 100644
--- a/hidimstat/utils.py
+++ b/hidimstat/utils.py
@@ -147,7 +147,7 @@ def _fixed_quantile_aggregation(pvals, gamma=0.5):
 def _adaptive_quantile_aggregation(pvals, gamma_min=0.05):
     """adaptive version of the quantile aggregation method, Meinshausen et al.
     (2008)"""
-    gammas = np.linspace(gamma_min, 1., 30)
+    gammas = np.linspace(gamma_min, 1.0, 30)
     list_Q = np.array([_fixed_quantile_aggregation(pvals, gamma) for gamma in gammas])
 
     return np.minimum(1, (1 - np.log(gamma_min)) * list_Q.min(0))

From a830f047cdd45a21f7a3858b123845ebea9a3736 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 18 Dec 2024 15:35:25 +0100
Subject: [PATCH 12/26] Fix syntax

---
 hidimstat/scenario.py                    | 12 ++++----
 hidimstat/test/test_generate_knockoff.py | 36 ++++++++++++------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index 88130c9b..0286b560 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -16,7 +16,7 @@ def multivariate_1D_simulation(
     rho=0.0,
     shuffle=True,
     seed=0,
-    nb_group=0,
+    n_groups=0,
     group_size=0,
 ):
     """Generate 1D data with Toeplitz design matrix
@@ -44,7 +44,7 @@ def multivariate_1D_simulation(
     seed : int
         Seed used for generating design matrix and noise.
 
-    nb_group : int
+    n_groups : int
         Number of groups.
 
     group_size : int
@@ -67,9 +67,9 @@ def multivariate_1D_simulation(
     """
 
     rng = np.random.default_rng(seed)
-    if nb_group < 0 or group_size < 0:
+    if n_groups < 0 or group_size < 0:
         raise ValueError("The number of groups and their size must be positive.")
-    n_generate_samples = n_samples - nb_group * group_size
+    n_generate_samples = n_samples - n_groups * group_size
     if n_generate_samples <= 0:
         raise ValueError(
             "The number of samples is too small compate to the number "
@@ -85,8 +85,8 @@ def multivariate_1D_simulation(
     if shuffle:
         rng.shuffle(X.T)
 
-    if nb_group > 0:
-        groups = np.repeat(X[:nb_group], group_size, axis=0)
+    if n_groups > 0:
+        groups = np.repeat(X[:n_groups], group_size, axis=0)
         X = np.vstack((groups, X))
 
     beta = np.zeros(n_features)
diff --git a/hidimstat/test/test_generate_knockoff.py b/hidimstat/test/test_generate_knockoff.py
index ef858f67..231e8486 100644
--- a/hidimstat/test/test_generate_knockoff.py
+++ b/hidimstat/test/test_generate_knockoff.py
@@ -10,32 +10,32 @@
 
 
 def test_estimate_distribution_wolf():
-    SEED = 42
+    seed = 42
     n = 100
     p = 50
-    X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    mu, Sigma = _estimate_distribution(X, cov_estimator="ledoit_wolf")
+    X, _, _, _ = simu_data(n, p, seed=seed)
+    mu, sigma = _estimate_distribution(X, cov_estimator="ledoit_wolf")
 
     assert mu.size == p
-    assert Sigma.shape == (p, p)
+    assert sigma.shape == (p, p)
 
 
 def test_estimate_distribution_lasso():
-    SEED = 42
+    seed = 42
     n = 100
     p = 50
-    X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="graph_lasso")
+    X, _, _, _ = simu_data(n, p, seed=seed)
+    mu, sigma = _estimate_distribution(X, shrink=True, cov_estimator="graph_lasso")
 
     assert mu.size == p
-    assert Sigma.shape == (p, p)
+    assert sigma.shape == (p, p)
 
 
 def test_gaussian_knockoff_estimate_exception():
-    SEED = 42
+    seed = 42
     n = 100
     p = 50
-    X, y, _, non_zero = simu_data(n, p, seed=SEED)
+    X, _, _, _ = simu_data(n, p, seed=seed)
     with pytest.raises(
         ValueError, match="test is not a valid covariance estimated method"
     ):
@@ -43,25 +43,25 @@ def test_gaussian_knockoff_estimate_exception():
 
 
 def test_gaussian_knockoff_equi():
-    SEED = 42
+    seed = 42
     n = 100
     p = 50
-    X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
+    X, _, _, _ = simu_data(n, p, seed=seed)
+    mu, sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
 
-    X_tilde = gaussian_knockoff_generation(X, mu, Sigma, method="equi", seed=SEED * 2)
+    X_tilde = gaussian_knockoff_generation(X, mu, sigma, method="equi", seed=seed * 2)
 
     assert X_tilde.shape == (n, p)
 
 
 def test_gaussian_knockoff_exception():
-    SEED = 42
+    seed = 42
     n = 100
     p = 50
-    X, y, _, non_zero = simu_data(n, p, seed=SEED)
-    mu, Sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
+    X, _, _, _ = simu_data(n, p, seed=seed)
+    mu, sigma = _estimate_distribution(X, shrink=True, cov_estimator="ledoit_wolf")
 
     with pytest.raises(
         ValueError, match="test is not a valid knockoff contriction method"
     ):
-        gaussian_knockoff_generation(X, mu, Sigma, method="test", seed=SEED * 2)
+        gaussian_knockoff_generation(X, mu, sigma, method="test", seed=seed * 2)

From 63e6c540bbac6735f4d3dd75301fa2b63f2d03d4 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 18 Dec 2024 15:43:56 +0100
Subject: [PATCH 13/26] Fix bugs by changing name of groups

---
 hidimstat/test/test_clustered_inference.py | 6 +++---
 hidimstat/test/test_scenario.py            | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hidimstat/test/test_clustered_inference.py b/hidimstat/test/test_clustered_inference.py
index 490b5365..4a726367 100644
--- a/hidimstat/test/test_clustered_inference.py
+++ b/hidimstat/test/test_clustered_inference.py
@@ -131,7 +131,7 @@ def test_clustered_inference_no_temporal_groups():
 
     n_samples, n_features = 100, 2000
     support_size = 10
-    nb_groups = 9
+    n_groupss = 9
     size = 10
     sigma = 5.0
     rho = 0.95
@@ -148,10 +148,10 @@ def test_clustered_inference_no_temporal_groups():
         rho=rho,
         shuffle=False,
         seed=2,
-        nb_group=nb_groups,
+        n_groups=n_groupss,
         group_size=size,
     )
-    groups = np.concatenate([[i] * size for i in range(nb_groups + 1)])
+    groups = np.concatenate([[i] * size for i in range(n_groupss + 1)])
 
     y = y - np.mean(y)
     X_init = X_init - np.mean(X_init, axis=0)
diff --git a/hidimstat/test/test_scenario.py b/hidimstat/test/test_scenario.py
index e6d706a6..c17794ad 100644
--- a/hidimstat/test/test_scenario.py
+++ b/hidimstat/test/test_scenario.py
@@ -59,7 +59,7 @@ def test_multivariate_1D_simulation():
 
     # Test 3
     X, y, beta, noise = multivariate_1D_simulation(
-        n_samples=9, nb_group=2, group_size=3
+        n_samples=9, n_groups=2, group_size=3
     )
     corr_X = np.corrcoef(X)
     assert_almost_equal(corr_X[0, 0], 1, decimal=1)
@@ -89,14 +89,14 @@ def test_multivariate_1D_simulation_exception():
     with pytest.raises(
         ValueError, match="The number of groups and their size must be positive."
     ):
-        multivariate_1D_simulation(nb_group=-1)
+        multivariate_1D_simulation(n_groups=-1)
 
     with pytest.raises(
         ValueError,
         match="The number of samples is too small compate to the number "
         "of group and their size to gerate the data.",
     ):
-        multivariate_1D_simulation(n_samples=10, nb_group=2, group_size=6)
+        multivariate_1D_simulation(n_samples=10, n_groups=2, group_size=6)
 
 
 def test_multivariate_simulation():

From 3dfc30ee4066b594b3be7662646204c85816f228 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 19 Dec 2024 10:37:43 +0100
Subject: [PATCH 14/26] Modified scenario with group

---
 hidimstat/scenario.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index 0286b560..1defd92d 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -67,17 +67,22 @@ def multivariate_1D_simulation(
     """
 
     rng = np.random.default_rng(seed)
+    
     if n_groups < 0 or group_size < 0:
         raise ValueError("The number of groups and their size must be positive.")
-    n_generate_samples = n_samples - n_groups * group_size
-    if n_generate_samples <= 0:
+    
+    n_individual_samples = n_samples - n_groups * group_size
+    if n_individual_samples <= 0:
         raise ValueError(
             "The number of samples is too small compate to the number "
             "of group and their size to gerate the data."
         )
+    
+    n_generate_samples = n_groups + n_individual_samples
+    
+    # generate random data for each samples
     X = np.zeros((n_generate_samples, n_features))
     X[:, 0] = rng.standard_normal(n_generate_samples)
-
     for i in np.arange(1, n_features):
         rand_vector = ((1 - rho**2) ** 0.5) * rng.standard_normal(n_generate_samples)
         X[:, i] = rho * X[:, i - 1] + rand_vector
@@ -85,13 +90,16 @@ def multivariate_1D_simulation(
     if shuffle:
         rng.shuffle(X.T)
 
+    # generate data for the groups based on one sample 
     if n_groups > 0:
         groups = np.repeat(X[:n_groups], group_size, axis=0)
-        X = np.vstack((groups, X))
+        X = np.vstack((groups, X[n_groups:]))
 
+    # generate the vector of variable of importances
     beta = np.zeros(n_features)
     beta[0:support_size] = 1.0
 
+    # generate the simulated regression data
     noise = sigma * rng.standard_normal(n_samples)
     y = np.dot(X, beta) + noise
 

From 369ccdb7282cb4851a4c1d5dcfde6f68df39188c Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 19 Dec 2024 10:39:23 +0100
Subject: [PATCH 15/26] Fix typo

---
 hidimstat/scenario.py                      | 10 +++++-----
 hidimstat/test/test_clustered_inference.py |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py
index 1defd92d..60f15d77 100644
--- a/hidimstat/scenario.py
+++ b/hidimstat/scenario.py
@@ -67,19 +67,19 @@ def multivariate_1D_simulation(
     """
 
     rng = np.random.default_rng(seed)
-    
+
     if n_groups < 0 or group_size < 0:
         raise ValueError("The number of groups and their size must be positive.")
-    
+
     n_individual_samples = n_samples - n_groups * group_size
     if n_individual_samples <= 0:
         raise ValueError(
             "The number of samples is too small compate to the number "
             "of group and their size to gerate the data."
         )
-    
+
     n_generate_samples = n_groups + n_individual_samples
-    
+
     # generate random data for each samples
     X = np.zeros((n_generate_samples, n_features))
     X[:, 0] = rng.standard_normal(n_generate_samples)
@@ -90,7 +90,7 @@ def multivariate_1D_simulation(
     if shuffle:
         rng.shuffle(X.T)
 
-    # generate data for the groups based on one sample 
+    # generate data for the groups based on one sample
     if n_groups > 0:
         groups = np.repeat(X[:n_groups], group_size, axis=0)
         X = np.vstack((groups, X[n_groups:]))
diff --git a/hidimstat/test/test_clustered_inference.py b/hidimstat/test/test_clustered_inference.py
index 4a726367..12149597 100644
--- a/hidimstat/test/test_clustered_inference.py
+++ b/hidimstat/test/test_clustered_inference.py
@@ -131,7 +131,7 @@ def test_clustered_inference_no_temporal_groups():
 
     n_samples, n_features = 100, 2000
     support_size = 10
-    n_groupss = 9
+    n_groups = 9
     size = 10
     sigma = 5.0
     rho = 0.95
@@ -148,10 +148,10 @@ def test_clustered_inference_no_temporal_groups():
         rho=rho,
         shuffle=False,
         seed=2,
-        n_groups=n_groupss,
+        n_groups=n_groups,
         group_size=size,
     )
-    groups = np.concatenate([[i] * size for i in range(n_groupss + 1)])
+    groups = np.concatenate([[i] * size for i in range(n_groups + 1)])
 
     y = y - np.mean(y)
     X_init = X_init - np.mean(X_init, axis=0)

From 7e03524e781f03a523d72e352bd332d509606292 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 6 Jan 2025 18:10:06 +0100
Subject: [PATCH 16/26] don't use global variable

---
 hidimstat/test/test_knockoff_aggregation.py | 14 +++++++-------
 hidimstat/test/test_model_x_knockoff.py     |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hidimstat/test/test_knockoff_aggregation.py b/hidimstat/test/test_knockoff_aggregation.py
index 67d5628c..49fa7798 100644
--- a/hidimstat/test/test_knockoff_aggregation.py
+++ b/hidimstat/test/test_knockoff_aggregation.py
@@ -4,15 +4,15 @@
 import numpy as np
 import pytest
 
-n = 500
-p = 100
-snr = 5
-n_bootstraps = 25
-fdr = 0.5
-X, y, _, non_zero_index = simu_data(n, p, snr=snr, seed=0)
-
 
 def test_knockoff_aggregation():
+    n = 500
+    p = 100
+    snr = 5
+    n_bootstraps = 25
+    fdr = 0.5
+    X, y, _, non_zero_index = simu_data(n, p, snr=snr, seed=0)
+
     selected_verbose, aggregated_pval, pvals = knockoff_aggregation(
         X, y, fdr=fdr, n_bootstraps=n_bootstraps, verbose=True, random_state=0
     )
diff --git a/hidimstat/test/test_model_x_knockoff.py b/hidimstat/test/test_model_x_knockoff.py
index fe56f887..c4cb1b7e 100644
--- a/hidimstat/test/test_model_x_knockoff.py
+++ b/hidimstat/test/test_model_x_knockoff.py
@@ -2,12 +2,10 @@
 from hidimstat.data_simulation import simu_data
 from hidimstat.utils import cal_fdp_power
 
-seed = 42
-fdr = 0.2
-
 
 def test_model_x_knockoff():
-
+    seed = 42
+    fdr = 0.2
     n = 300
     p = 300
     X, y, _, non_zero = simu_data(n, p, seed=seed)
@@ -19,6 +17,8 @@ def test_model_x_knockoff():
 
 
 def test_model_x_knockoff_with_verbose():
+    seed = 42
+    fdr = 0.2
     n = 300
     p = 300
     X, y, _, non_zero = simu_data(n, p, seed=seed)

From 794310efd5c7f1c47384ae79175c75a03bcfcca9 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Tue, 7 Jan 2025 15:05:38 +0100
Subject: [PATCH 17/26] Apply suggestions from code review

improvement

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/scenario.py |  4 ++--
 test/test_scenario.py     | 23 +++++------------------
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/hidimstat/scenario.py b/src/hidimstat/scenario.py
index f147e5ad..c07dfdbf 100644
--- a/src/hidimstat/scenario.py
+++ b/src/hidimstat/scenario.py
@@ -54,7 +54,7 @@ def multivariate_1D_simulation(
     -------
     X : ndarray, shape (n_samples, n_features)
         Design matrix.
-        if their is some groups, the first rows contains the groups values.
+        if there are some groups, the first rows contains the group index
 
     y : ndarray, shape (n_samples,)
         Target.
@@ -74,7 +74,7 @@ def multivariate_1D_simulation(
     n_individual_samples = n_samples - n_groups * group_size
     if n_individual_samples <= 0:
         raise ValueError(
-            "The number of samples is too small compate to the number "
+            "The number of samples is too small compared to the number "
             "of group and their size to gerate the data."
         )
 
diff --git a/test/test_scenario.py b/test/test_scenario.py
index c17794ad..08be5e06 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -62,24 +62,11 @@ def test_multivariate_1D_simulation():
         n_samples=9, n_groups=2, group_size=3
     )
     corr_X = np.corrcoef(X)
-    assert_almost_equal(corr_X[0, 0], 1, decimal=1)
-    assert_almost_equal(corr_X[0, 1], 1, decimal=1)
-    assert_almost_equal(corr_X[0, 2], 1, decimal=1)
-    assert_almost_equal(corr_X[1, 0], 1, decimal=1)
-    assert_almost_equal(corr_X[1, 1], 1, decimal=1)
-    assert_almost_equal(corr_X[1, 2], 1, decimal=1)
-    assert_almost_equal(corr_X[2, 0], 1, decimal=1)
-    assert_almost_equal(corr_X[2, 1], 1, decimal=1)
-    assert_almost_equal(corr_X[2, 2], 1, decimal=1)
-    assert_almost_equal(corr_X[3, 3], 1, decimal=1)
-    assert_almost_equal(corr_X[3, 4], 1, decimal=1)
-    assert_almost_equal(corr_X[3, 5], 1, decimal=1)
-    assert_almost_equal(corr_X[4, 3], 1, decimal=1)
-    assert_almost_equal(corr_X[4, 4], 1, decimal=1)
-    assert_almost_equal(corr_X[4, 5], 1, decimal=1)
-    assert_almost_equal(corr_X[5, 3], 1, decimal=1)
-    assert_almost_equal(corr_X[5, 4], 1, decimal=1)
-    assert_almost_equal(corr_X[5, 5], 1, decimal=1)
+    from itertools import product
+    groups = [[0, 1, 2], [3, 4, 5]]
+    for g in groups:
+        for i, j in product(g, g):
+            assert_almost_equal(corr_X[i, j], 1, decimal=1)
 
 
 def test_multivariate_1D_simulation_exception():

From 9c69a8fffec31cb201265c740ecdafd3fb90e8c7 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 7 Jan 2025 10:20:55 +0100
Subject: [PATCH 18/26] Change pvalue to evalue

---
 src/hidimstat/utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/hidimstat/utils.py b/src/hidimstat/utils.py
index 0c1abc44..daf4e276 100644
--- a/src/hidimstat/utils.py
+++ b/src/hidimstat/utils.py
@@ -84,17 +84,17 @@ def _bhq_threshold(pvals, fdr=0.1):
         return -1.0
 
 
-def _ebh_threshold(pvals, fdr=0.1):
+def _ebh_threshold(evals, fdr=0.1):
     """e-BH procedure for FDR control (see Wang and Ramdas 2021)"""
-    n_features = len(pvals)
-    pvals_sorted = -np.sort(-pvals)  # sort in descending order
+    n_features = len(evals)
+    evals_sorted = -np.sort(-evals)  # sort in descending order
     selected_index = 2 * n_features
     for i in range(n_features - 1, -1, -1):
-        if pvals_sorted[i] >= n_features / (fdr * (i + 1)):
+        if evals_sorted[i] >= n_features / (fdr * (i + 1)):
             selected_index = i
             break
     if selected_index <= n_features:
-        return pvals_sorted[selected_index]
+        return evals_sorted[selected_index]
     else:
         return np.inf
 

From 12edb2342c42c97ec10691a94cc43a4978a176cb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 7 Jan 2025 15:15:55 +0100
Subject: [PATCH 19/26] Fix bugs and typo

---
 test/test_scenario.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_scenario.py b/test/test_scenario.py
index 08be5e06..11a35c21 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -63,6 +63,7 @@ def test_multivariate_1D_simulation():
     )
     corr_X = np.corrcoef(X)
     from itertools import product
+
     groups = [[0, 1, 2], [3, 4, 5]]
     for g in groups:
         for i, j in product(g, g):
@@ -80,7 +81,7 @@ def test_multivariate_1D_simulation_exception():
 
     with pytest.raises(
         ValueError,
-        match="The number of samples is too small compate to the number "
+        match="The number of samples is too small compared to the number "
         "of group and their size to gerate the data.",
     ):
         multivariate_1D_simulation(n_samples=10, n_groups=2, group_size=6)

From e06122f2cce53c5074bd4f3a5a9d4e3f5af7e2d1 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Fri, 17 Jan 2025 10:34:10 +0100
Subject: [PATCH 20/26] Apply suggestions from code review

change position of import

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 test/test_scenario.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/test_scenario.py b/test/test_scenario.py
index 11a35c21..12b3533d 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -1,7 +1,7 @@
 """
 Test the scenario module
 """
-
+from itertools import product
 import numpy as np
 import pytest
 from numpy.testing import assert_almost_equal, assert_equal
@@ -62,7 +62,6 @@ def test_multivariate_1D_simulation():
         n_samples=9, n_groups=2, group_size=3
     )
     corr_X = np.corrcoef(X)
-    from itertools import product
 
     groups = [[0, 1, 2], [3, 4, 5]]
     for g in groups:

From cee3f9afbcf4c348037511aec2cec8b78abcad97 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Fri, 17 Jan 2025 10:37:41 +0100
Subject: [PATCH 21/26] Update test_scenario.py

Formatting the file
---
 test/test_scenario.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_scenario.py b/test/test_scenario.py
index 12b3533d..28d27618 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -1,6 +1,7 @@
 """
 Test the scenario module
 """
+
 from itertools import product
 import numpy as np
 import pytest

From 0937e9cb719614519320fc7abb5bee00ffacde4c Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 17 Jan 2025 18:49:48 +0100
Subject: [PATCH 22/26] Reverse the option group

---
 src/hidimstat/scenario.py        | 31 +++-------------------
 test/test_clustered_inference.py | 45 ++++++++++++++++++--------------
 test/test_scenario.py            | 28 --------------------
 3 files changed, 29 insertions(+), 75 deletions(-)

diff --git a/src/hidimstat/scenario.py b/src/hidimstat/scenario.py
index c07dfdbf..1bc18be0 100644
--- a/src/hidimstat/scenario.py
+++ b/src/hidimstat/scenario.py
@@ -16,8 +16,6 @@ def multivariate_1D_simulation(
     rho=0.0,
     shuffle=True,
     seed=0,
-    n_groups=0,
-    group_size=0,
 ):
     """Generate 1D data with Toeplitz design matrix
 
@@ -44,17 +42,10 @@ def multivariate_1D_simulation(
     seed : int
         Seed used for generating design matrix and noise.
 
-    n_groups : int
-        Number of groups.
-
-    group_size : int
-        Size of each group.
-
     Returns
     -------
     X : ndarray, shape (n_samples, n_features)
         Design matrix.
-        if there are some groups, the first rows contains the group index
 
     y : ndarray, shape (n_samples,)
         Target.
@@ -68,33 +59,17 @@ def multivariate_1D_simulation(
 
     rng = np.random.default_rng(seed)
 
-    if n_groups < 0 or group_size < 0:
-        raise ValueError("The number of groups and their size must be positive.")
-
-    n_individual_samples = n_samples - n_groups * group_size
-    if n_individual_samples <= 0:
-        raise ValueError(
-            "The number of samples is too small compared to the number "
-            "of group and their size to gerate the data."
-        )
-
-    n_generate_samples = n_groups + n_individual_samples
 
     # generate random data for each samples
-    X = np.zeros((n_generate_samples, n_features))
-    X[:, 0] = rng.standard_normal(n_generate_samples)
+    X = np.zeros((n_samples, n_features))
+    X[:, 0] = rng.standard_normal(n_samples)
     for i in np.arange(1, n_features):
-        rand_vector = ((1 - rho**2) ** 0.5) * rng.standard_normal(n_generate_samples)
+        rand_vector = ((1 - rho**2) ** 0.5) * rng.standard_normal(n_samples)
         X[:, i] = rho * X[:, i - 1] + rand_vector
 
     if shuffle:
         rng.shuffle(X.T)
 
-    # generate data for the groups based on one sample
-    if n_groups > 0:
-        groups = np.repeat(X[:n_groups], group_size, axis=0)
-        X = np.vstack((groups, X[n_groups:]))
-
     # generate the vector of variable of importances
     beta = np.zeros(n_features)
     beta[0:support_size] = 1.0
diff --git a/test/test_clustered_inference.py b/test/test_clustered_inference.py
index 12149597..221647e2 100644
--- a/test/test_clustered_inference.py
+++ b/test/test_clustered_inference.py
@@ -14,6 +14,8 @@
     multivariate_temporal_simulation,
 )
 
+from sklearn.datasets import make_multilabel_classification
+
 
 # Scenario 1: data with no temporal dimension
 def test_clustered_inference_no_temporal():
@@ -129,10 +131,9 @@ def test_clustered_inference_no_temporal_groups():
     the support and p-values close to 0.5 for the others.
     """
 
-    n_samples, n_features = 100, 2000
+    n_samples, n_features = 20, 2000
     support_size = 10
-    n_groups = 9
-    size = 10
+    n_groups = 10
     sigma = 5.0
     rho = 0.95
     n_clusters = 200
@@ -140,21 +141,27 @@ def test_clustered_inference_no_temporal_groups():
     interior_support = support_size - margin_size
     extended_support = support_size + margin_size
 
-    X_init, y, beta, epsilon = multivariate_1D_simulation(
-        n_samples=n_samples,
-        n_features=n_features,
-        support_size=support_size,
-        sigma=sigma,
-        rho=rho,
-        shuffle=False,
-        seed=2,
-        n_groups=n_groups,
-        group_size=size,
-    )
-    groups = np.concatenate([[i] * size for i in range(n_groups + 1)])
-
-    y = y - np.mean(y)
-    X_init = X_init - np.mean(X_init, axis=0)
+    # create n_group of samples
+    X_ = []
+    y_ = []
+    for i in range(n_groups):
+        X_init, y, beta, epsilon = multivariate_1D_simulation(
+            n_samples=n_samples,
+            n_features=n_features,
+            support_size=support_size,
+            sigma=sigma,
+            rho=rho,
+            shuffle=False,
+            seed=2 + i,
+        )
+        X_.append(X_init)
+        y_.append(y)
+
+    y_ = np.concatenate(y_)
+    y_ = y_ - np.mean(y_)
+    X_ = np.concatenate(X_)
+    X_ = X_ - np.mean(X_, axis=0)
+    groups = np.repeat(np.arange(0, n_groups), n_samples)
 
     connectivity = image.grid_to_graph(n_x=n_features, n_y=1, n_z=1)
     ward = FeatureAgglomeration(
@@ -162,7 +169,7 @@ def test_clustered_inference_no_temporal_groups():
     )
 
     beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        clustered_inference(X_init, y, ward, n_clusters, groups=groups)
+        clustered_inference(X_, y_, ward, n_clusters, groups=groups)
     )
 
     expected = 0.5 * np.ones(n_features)
diff --git a/test/test_scenario.py b/test/test_scenario.py
index 28d27618..904dda48 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -58,34 +58,6 @@ def test_multivariate_1D_simulation():
     rho_hat = np.corrcoef(X[:, 19], X[:, 20])[0, 1]
     assert_almost_equal(rho_hat, 0, decimal=1)
 
-    # Test 3
-    X, y, beta, noise = multivariate_1D_simulation(
-        n_samples=9, n_groups=2, group_size=3
-    )
-    corr_X = np.corrcoef(X)
-
-    groups = [[0, 1, 2], [3, 4, 5]]
-    for g in groups:
-        for i, j in product(g, g):
-            assert_almost_equal(corr_X[i, j], 1, decimal=1)
-
-
-def test_multivariate_1D_simulation_exception():
-    """
-    Test when the input paramters is not correct.
-    """
-    with pytest.raises(
-        ValueError, match="The number of groups and their size must be positive."
-    ):
-        multivariate_1D_simulation(n_groups=-1)
-
-    with pytest.raises(
-        ValueError,
-        match="The number of samples is too small compared to the number "
-        "of group and their size to gerate the data.",
-    ):
-        multivariate_1D_simulation(n_samples=10, n_groups=2, group_size=6)
-
 
 def test_multivariate_simulation():
     """Test if the data has expected shape, if the input parameters

From 852106f9c329bbe0d0d42ebdc4c5421119093795 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 17 Jan 2025 18:53:47 +0100
Subject: [PATCH 23/26] Remove unecessesary import

---
 test/test_clustered_inference.py | 2 --
 test/test_scenario.py            | 2 --
 2 files changed, 4 deletions(-)

diff --git a/test/test_clustered_inference.py b/test/test_clustered_inference.py
index 221647e2..8b4d501a 100644
--- a/test/test_clustered_inference.py
+++ b/test/test_clustered_inference.py
@@ -14,8 +14,6 @@
     multivariate_temporal_simulation,
 )
 
-from sklearn.datasets import make_multilabel_classification
-
 
 # Scenario 1: data with no temporal dimension
 def test_clustered_inference_no_temporal():
diff --git a/test/test_scenario.py b/test/test_scenario.py
index 904dda48..503f6848 100644
--- a/test/test_scenario.py
+++ b/test/test_scenario.py
@@ -2,9 +2,7 @@
 Test the scenario module
 """
 
-from itertools import product
 import numpy as np
-import pytest
 from numpy.testing import assert_almost_equal, assert_equal
 
 from hidimstat.scenario import (

From 943fe11ea0046a1297a2ceb0420944ea178b9219 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 17 Jan 2025 18:55:38 +0100
Subject: [PATCH 24/26] Format file

---
 src/hidimstat/scenario.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/hidimstat/scenario.py b/src/hidimstat/scenario.py
index 1bc18be0..e9bde2dc 100644
--- a/src/hidimstat/scenario.py
+++ b/src/hidimstat/scenario.py
@@ -59,7 +59,6 @@ def multivariate_1D_simulation(
 
     rng = np.random.default_rng(seed)
 
-
     # generate random data for each samples
     X = np.zeros((n_samples, n_features))
     X[:, 0] = rng.standard_normal(n_samples)

From 85b44a357e6a546d50fe1524d00ea7f1686300e1 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 20 Jan 2025 13:27:02 +0100
Subject: [PATCH 25/26] small difference

---
 src/hidimstat/utils.py         | 2 +-
 test/test_generate_knockoff.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/utils.py b/src/hidimstat/utils.py
index daf4e276..1c67e1b2 100644
--- a/src/hidimstat/utils.py
+++ b/src/hidimstat/utils.py
@@ -96,7 +96,7 @@ def _ebh_threshold(evals, fdr=0.1):
     if selected_index <= n_features:
         return evals_sorted[selected_index]
     else:
-        return np.inf
+        return np.infty
 
 
 def _bhy_threshold(pvals, reshaping_function=None, fdr=0.1):
diff --git a/test/test_generate_knockoff.py b/test/test_generate_knockoff.py
index 231e8486..570e6a99 100644
--- a/test/test_generate_knockoff.py
+++ b/test/test_generate_knockoff.py
@@ -9,7 +9,7 @@
 import pytest
 
 
-def test_estimate_distribution_wolf():
+def test_estimate_distribution_ledoit_wolf():
     seed = 42
     n = 100
     p = 50

From 85a6026f1c6d1e2a7d03a2388bcf39530902a189 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 21 Jan 2025 08:58:41 +0100
Subject: [PATCH 26/26] remove file setup

---
 src/hidimstat/setup.py | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 src/hidimstat/setup.py

diff --git a/src/hidimstat/setup.py b/src/hidimstat/setup.py
deleted file mode 100644
index 15c6389d..00000000
--- a/src/hidimstat/setup.py
+++ /dev/null
@@ -1,14 +0,0 @@
-def configuration(parent_package="", top_path=None):
-    from numpy.distutils.misc_util import Configuration
-
-    config = Configuration("plotting", parent_package, top_path)
-
-    config.add_subpackage("tests")
-
-    return config
-
-
-if __name__ == "__main__":
-    from numpy.distutils.core import setup
-
-    setup(**configuration(top_path="").todict())