tests: prefer cache for missing config (#2414)

move import inside ref metric (cherry picked from commit a8c11b4)
Lightning-AI · Mar 18, 2024 · 0f26306 · 0f26306
1 parent 416779e
commit 0f26306
Show file tree

Hide file tree

Showing 27 changed files with 107 additions and 120 deletions.
diff --git a/.github/actions/pull-caches/action.yml b/.github/actions/pull-caches/action.yml
@@ -90,5 +90,5 @@ runs:
 
     - name: Restored References
       continue-on-error: true
-      run: ls -lh tests/_cache-references/
+      run: py-tree tests/_cache-references/ --show_hidden
       shell: bash
diff --git a/.github/actions/push-caches/action.yml b/.github/actions/push-caches/action.yml
@@ -99,5 +99,5 @@ runs:
         key: cache-references
 
     - name: Post References
-      run: ls -lh tests/_cache-references/
+      run: py-tree tests/_cache-references/ --show_hidden
       shell: bash
diff --git a/.gitignore b/.gitignore
@@ -40,7 +40,7 @@ pip-delete-this-directory.txt
 # Unit test / coverage reports
 tests/_data/
 data.zip
-tests/_reference-cache/
+tests/_cache-references/
 htmlcov/
 .coverage
 .coverage.*

diff --git a/src/torchmetrics/utilities/imports.py b/src/torchmetrics/utilities/imports.py
@@ -17,10 +17,8 @@
 import sys
 
 from lightning_utilities.core.imports import RequirementCache
-from packaging.version import Version, parse
 
 _PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
-_PYTHON_LOWER_3_8 = parse(_PYTHON_VERSION) < Version("3.8")
 _TORCH_LOWER_2_0 = RequirementCache("torch<2.0.0")
 _TORCH_GREATER_EQUAL_1_11 = RequirementCache("torch>=1.11.0")
 _TORCH_GREATER_EQUAL_1_12 = RequirementCache("torch>=1.12.0")
@@ -29,7 +27,6 @@
 _TORCH_GREATER_EQUAL_2_1 = RequirementCache("torch>=2.1.0")
 _TORCH_GREATER_EQUAL_2_2 = RequirementCache("torch>=2.2.0")
 
-_JIWER_AVAILABLE = RequirementCache("jiwer")
 _NLTK_AVAILABLE = RequirementCache("nltk")
 _ROUGE_SCORE_AVAILABLE = RequirementCache("rouge_score")
 _BERTSCORE_AVAILABLE = RequirementCache("bert_score")
@@ -49,7 +46,6 @@
 _GAMMATONE_AVAILABLE = RequirementCache("gammatone")
 _TORCHAUDIO_AVAILABLE = RequirementCache("torchaudio")
 _TORCHAUDIO_GREATER_EQUAL_0_10 = RequirementCache("torchaudio>=0.10.0")
-_SACREBLEU_AVAILABLE = RequirementCache("sacrebleu")
 _REGEX_AVAILABLE = RequirementCache("regex")
 _PYSTOI_AVAILABLE = RequirementCache("pystoi")
 _FAST_BSS_EVAL_AVAILABLE = RequirementCache("fast_bss_eval")

diff --git a/tests/unittests/classification/test_group_fairness.py b/tests/unittests/classification/test_group_fairness.py
@@ -26,7 +26,6 @@
 from torchmetrics import Metric
 from torchmetrics.classification.group_fairness import BinaryFairness
 from torchmetrics.functional.classification.group_fairness import binary_fairness
-from torchmetrics.utilities.imports import _PYTHON_LOWER_3_8
 
 from unittests import THRESHOLD
 from unittests.classification._inputs import _group_cases
@@ -222,7 +221,6 @@ def run_precision_test_gpu(
 
 @mock.patch("unittests.helpers.testers._assert_tensor", _assert_tensor)
 @mock.patch("unittests.helpers.testers._assert_allclose", _assert_allclose)
-@pytest.mark.skipif(_PYTHON_LOWER_3_8, reason="`TestBinaryFairness` requires `python>=3.8`.")
 @pytest.mark.parametrize("inputs", _group_cases)
 class TestBinaryFairness(BinaryFairnessTester):
     """Test class for `BinaryFairness` metric."""

diff --git a/tests/unittests/image/test_fid.py b/tests/unittests/image/test_fid.py
@@ -34,7 +34,7 @@ def test_no_train_network_missing_torch_fidelity():
         NoTrainInceptionV3(name="inception-v3-compat", features_list=["2048"])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -52,7 +52,7 @@ def forward(self, x):
     assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_fid_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = FrechetInceptionDistance()
@@ -80,7 +80,7 @@ def test_fid_raises_errors_and_warnings():
         _ = FrechetInceptionDistance(feature=[1, 2])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_fid_same_input(feature):
     """If real and fake are update on the same data the fid score should be 0."""
@@ -111,7 +111,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("equal_size", [False, True])
 def test_compare_fid(tmpdir, equal_size, feature=768):
     """Check that the hole pipeline give the same result as torch-fidelity."""

diff --git a/tests/unittests/image/test_inception.py b/tests/unittests/image/test_inception.py
@@ -24,7 +24,7 @@
 torch.manual_seed(42)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -44,7 +44,7 @@ def forward(self, x):
     ), "InceptionScore metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_is_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = InceptionScore()
@@ -79,7 +79,7 @@ def test_is_raises_errors_and_warnings():
         InceptionScore(feature=[1, 2])
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_is_update_compute():
     """Test that inception score works as expected."""
     metric = InceptionScore()
@@ -105,7 +105,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("compute_on_cpu", [True, False])
 def test_compare_is(tmpdir, compute_on_cpu):
     """Check that the hole pipeline give the same result as torch-fidelity."""

diff --git a/tests/unittests/image/test_kid.py b/tests/unittests/image/test_kid.py
@@ -24,7 +24,7 @@
 torch.manual_seed(42)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -42,7 +42,7 @@ def forward(self, x):
     assert not model.metric.inception.training, "FID metric was changed to training mode which should not happen"
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_kid_pickle():
     """Assert that we can initialize the metric and pickle it."""
     metric = KernelInceptionDistance()
@@ -83,7 +83,7 @@ def test_kid_raises_errors_and_warnings():
         m.compute()
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_kid_extra_parameters():
     """Test that the different input arguments raises expected errors if wrong."""
     with pytest.raises(ValueError, match="Argument `subsets` expected to be integer larger than 0"):
@@ -102,7 +102,7 @@ def test_kid_extra_parameters():
         KernelInceptionDistance(coef=-1)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_kid_same_input(feature):
     """Test that the metric works."""
@@ -132,7 +132,7 @@ def __len__(self) -> int:
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_compare_kid(tmpdir, feature=2048):
     """Check that the hole pipeline give the same result as torch-fidelity."""
     from torch_fidelity import calculate_metrics

diff --git a/tests/unittests/image/test_lpips.py b/tests/unittests/image/test_lpips.py
@@ -16,11 +16,10 @@
 
 import pytest
 import torch
-from lpips import LPIPS as LPIPS_reference  # noqa: N811
 from torch import Tensor
 from torchmetrics.functional.image.lpips import learned_perceptual_image_patch_similarity
 from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
-from torchmetrics.utilities.imports import _LPIPS_AVAILABLE, _TORCHVISION_AVAILABLE
+from torchmetrics.utilities.imports import _TORCHVISION_AVAILABLE
 
 from unittests.helpers import seed_all
 from unittests.helpers.testers import MetricTester
@@ -43,15 +42,19 @@ def _reference_lpips(
     img1: Tensor, img2: Tensor, net_type: str, normalize: bool = False, reduction: str = "mean"
 ) -> Tensor:
     """Comparison function for tm implementation."""
-    ref = LPIPS_reference(net=net_type)
+    try:
+        from lpips import LPIPS
+    except ImportError:
+        pytest.skip("test requires lpips package to be installed")
+
+    ref = LPIPS(net=net_type)
     res = ref(img1, img2, normalize=normalize).detach().cpu().numpy()
     if reduction == "mean":
         return res.mean()
     return res.sum()
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 class TestLPIPS(MetricTester):
     """Test class for `LearnedPerceptualImagePatchSimilarity` metric."""
 
@@ -109,7 +112,6 @@ def test_normalize_arg(normalize):
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 def test_error_on_wrong_init():
     """Test class raises the expected errors."""
     with pytest.raises(ValueError, match="Argument `net_type` must be one .*"):
@@ -120,7 +122,6 @@ def test_error_on_wrong_init():
 
 
 @pytest.mark.skipif(not _TORCHVISION_AVAILABLE, reason="test requires that torchvision is installed")
-@pytest.mark.skipif(not _LPIPS_AVAILABLE, reason="test requires that lpips is installed")
 @pytest.mark.parametrize(
     ("inp1", "inp2"),
     [

diff --git a/tests/unittests/image/test_mifid.py b/tests/unittests/image/test_mifid.py
@@ -98,7 +98,7 @@ def calculate_mifid(m1, s1, features1, m2, s2, features2):
     return fid_private / (distance_private_thresholded + 1e-15)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 def test_no_train():
     """Assert that metric never leaves evaluation mode."""
 
@@ -139,7 +139,7 @@ def test_mifid_raises_errors_and_warnings():
         _ = MemorizationInformedFrechetInceptionDistance(cosine_distance_eps=1.1)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("feature", [64, 192, 768, 2048])
 def test_fid_same_input(feature):
     """If real and fake are update on the same data the fid score should be 0."""
@@ -157,7 +157,7 @@ def test_fid_same_input(feature):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test is too slow without gpu")
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch-fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("equal_size", [False, True])
 def test_compare_mifid(equal_size):
     """Check that our implementation of MIFID is correct by comparing it to the original implementation."""

diff --git a/tests/unittests/image/test_perceptual_path_length.py b/tests/unittests/image/test_perceptual_path_length.py
@@ -29,7 +29,7 @@
 seed_all(42)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize("interpolation_method", ["lerp", "slerp_any", "slerp_unit"])
 def test_interpolation_methods(interpolation_method):
     """Test that interpolation method works as expected."""
@@ -41,7 +41,7 @@ def test_interpolation_methods(interpolation_method):
     assert torch.allclose(res1, res2)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @skip_on_running_out_of_memory()
 def test_sim_net():
     """Check that the similarity network is the same as the one used in torch_fidelity."""
@@ -100,7 +100,7 @@ def sample(self, num_samples):
         return torch.randn(num_samples, self.z_size)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.parametrize(
     ("argument", "match"),
     [
@@ -174,7 +174,7 @@ def test_raises_error_on_wrong_generator(generator, errortype, match):
         ppl.update(generator=generator)
 
 
-@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
+@pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="metric requires torch-fidelity")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
 @skip_on_running_out_of_memory()
 def test_compare():

diff --git a/tests/unittests/multimodal/test_clip_iqa.py b/tests/unittests/multimodal/test_clip_iqa.py
@@ -71,7 +71,7 @@ def _reference_clip_iqa(preds, target, reduce=False):
     return res.sum() if reduce else res
 
 
-@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="test requires piq>=0.8")
+@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="metric requires piq>=0.8")
 @pytest.mark.skipif(not _TRANSFORMERS_GREATER_EQUAL_4_10, reason="test requires transformers>=4.10")
 class TestCLIPIQA(MetricTester):
     """Test clip iqa metric."""
@@ -104,7 +104,7 @@ def test_clip_iqa_functional(self, shapes):
 
 
 @skip_on_connection_issues()
-@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="test requires piq>=0.8")
+@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="metric requires piq>=0.8")
 @pytest.mark.skipif(not _TRANSFORMERS_GREATER_EQUAL_4_10, reason="test requires transformers>=4.10")
 @pytest.mark.skipif(not os.path.isfile(_SAMPLE_IMAGE), reason="test image not found")
 def test_for_correctness_sample_images():
@@ -121,7 +121,7 @@ def test_for_correctness_sample_images():
 
 
 @skip_on_connection_issues()
-@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="test requires piq>=0.8")
+@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="metric requires piq>=0.8")
 @pytest.mark.skipif(not _TRANSFORMERS_GREATER_EQUAL_4_10, reason="test requires transformers>=4.10")
 @pytest.mark.parametrize(
     "model",
@@ -148,7 +148,7 @@ def test_other_models(model):
 
 
 @skip_on_connection_issues()
-@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="test requires piq>=0.8")
+@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="metric requires piq>=0.8")
 @pytest.mark.skipif(not _TRANSFORMERS_GREATER_EQUAL_4_10, reason="test requires transformers>=4.10")
 @pytest.mark.parametrize(
     "prompts",
@@ -200,7 +200,7 @@ def test_prompt(prompts):
 
 
 @skip_on_connection_issues()
-@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="test requires piq>=0.8")
+@pytest.mark.skipif(not _PIQ_GREATER_EQUAL_0_8, reason="metric requires piq>=0.8")
 @pytest.mark.skipif(not _TRANSFORMERS_GREATER_EQUAL_4_10, reason="test requires transformers>=4.10")
 def test_plot_method():
     """Test the plot method of CLIPScore separately in this file due to the skipping conditions."""

diff --git a/tests/unittests/nominal/test_cramers.py b/tests/unittests/nominal/test_cramers.py
@@ -12,13 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import itertools
-import operator
 from functools import partial
 
 import pytest
 import torch
-from dython.nominal import cramers_v as dython_cramers_v
-from lightning_utilities.core.imports import compare_version
 from torchmetrics.functional.nominal.cramers import cramers_v, cramers_v_matrix
 from torchmetrics.nominal.cramers import CramersV
 
@@ -63,10 +60,15 @@ def cramers_matrix_input():
 
 
 def _reference_dython_cramers_v(preds, target, bias_correction, nan_strategy, nan_replace_value):
+    try:
+        from dython.nominal import cramers_v
+    except ImportError:
+        pytest.skip("This test requires `dython` package to be installed.")
+
     preds = preds.argmax(1) if preds.ndim == 2 else preds
     target = target.argmax(1) if target.ndim == 2 else target
 
-    v = dython_cramers_v(
+    v = cramers_v(
         preds.numpy(),
         target.numpy(),
         bias_correction=bias_correction,
@@ -87,7 +89,6 @@ def _dython_cramers_v_matrix(matrix, bias_correction, nan_strategy, nan_replace_
     return cramers_v_matrix_value
 
 
-@pytest.mark.skipif(compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`")
 @pytest.mark.parametrize(
     "preds, target",
     [
@@ -161,7 +162,6 @@ def test_cramers_v_differentiability(self, preds, target, bias_correction, nan_s
         )
 
 
-@pytest.mark.skipif(compare_version("pandas", operator.lt, "1.3.2"), reason="`dython` package requires `pandas>=1.3.2`")
 @pytest.mark.parametrize("bias_correction", [False, True])
 @pytest.mark.parametrize(("nan_strategy", "nan_replace_value"), [("replace", 1.0), ("drop", None)])
 def test_cramers_v_matrix(cramers_matrix_input, bias_correction, nan_strategy, nan_replace_value):