Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci/gpu: debuging & shielding ref. cache #2447

Merged
merged 9 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions .azure/gpu-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ jobs:
TEST_DIRS: "unittests"
# todo: consider unfreeze for master too
FREEZE_REQUIREMENTS: 1
PYTEST_REFERENCE_CACHE: "/var/tmp/cache-references"

container:
image: "$(docker-image)"
Expand Down Expand Up @@ -127,7 +126,13 @@ jobs:
pip install -q py-tree
py-tree /var/tmp/torch
py-tree /var/tmp/hf
py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# this gives more the 60k lines and takes a few minutes to run
#py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# make sure the cache exists even it is empty
mkdir -p /var/tmp/cached-references
# copy the cache to the tests folder to be used in the next steps
cp -r /var/tmp/cached-references tests/_cache-references
du -h --max-depth=1 tests/
displayName: "Show caches"

- bash: |
Expand Down Expand Up @@ -156,6 +161,7 @@ jobs:
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "60"
displayName: "UnitTesting common"

- bash: |
Expand All @@ -167,8 +173,16 @@ jobs:
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "60"
displayName: "UnitTesting DDP"

- bash: |
du -h --max-depth=1 tests/
# copy potentially updated cache to the machine filesystem to be reused with next jobs
cp -r --update tests/_cache-references /var/tmp/cached-references
# set as extra step to not pollute general cache when jobs fails or crashes
displayName: "Update cached refs"

- bash: |
python -m coverage report
python -m coverage xml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
--reruns-delay 1 \
-m "not DDP" \
-n auto \
--dist=loadfile \
--dist=load \
${{ env.UNITTEST_TIMEOUT }}

- name: Unittests DDP
Expand Down
6 changes: 3 additions & 3 deletions requirements/_doctest.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

pytest >=8.0.0, <8.1.0
pytest-doctestplus >1.0, <1.3
pytest-rerunfailures >10.0, <14.0
pytest >=8.0, <9.0
pytest-doctestplus >=1.0, <1.3
pytest-rerunfailures >=10.0, <14.0
2 changes: 1 addition & 1 deletion requirements/_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

coverage ==7.4.3
pytest ==8.0.0
pytest ==8.1.1
pytest-cov ==4.1.0
pytest-doctestplus ==1.2.1
pytest-rerunfailures ==13.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy
import torch

from unittests.helpers.wrappers import skip_on_connection_issues, skip_on_running_out_of_memory
from unittests._helpers.wrappers import skip_on_connection_issues, skip_on_running_out_of_memory


def seed_all(seed):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,18 @@ def _assert_dtype_support(
_assert_tensor(metric_functional(y_hat, y, **kwargs_update))


def _select_rand_best_device() -> str:
"""Select the best device to run tests on."""
nb_gpus = torch.cuda.device_count()
# todo: debug the eventual device checks/assets
# if nb_gpus > 1:
# from random import randrange
# return f"cuda:{randrange(nb_gpus)}"
if nb_gpus:
return "cuda"
return "cpu"


class MetricTester:
"""Test class for all metrics.

Expand Down Expand Up @@ -371,16 +383,14 @@ def run_functional_metric_test(
target when running update on the metric.

"""
device = "cuda" if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu"

_functional_test(
preds=preds,
target=target,
metric_functional=metric_functional,
reference_metric=reference_metric,
metric_args=metric_args,
atol=self.atol,
device=device,
device=_select_rand_best_device(),
fragment_kwargs=fragment_kwargs,
**kwargs_update,
)
Expand Down Expand Up @@ -431,7 +441,7 @@ def run_class_metric_test(
"reference_metric": reference_metric,
"metric_args": metric_args or {},
"atol": atol or self.atol,
"device": "cuda" if torch.cuda.is_available() else "cpu",
"device": _select_rand_best_device(),
"dist_sync_on_step": dist_sync_on_step,
"check_dist_sync_on_step": check_dist_sync_on_step,
"check_batch": check_batch,
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_c_si_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from torchmetrics.functional.audio import complex_scale_invariant_signal_noise_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_pesq.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from torchmetrics.functional.audio import perceptual_evaluation_speech_quality

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_pit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
)

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_sa_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
)

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_11

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _SAMPLE_NUMPY_ISSUE_895
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_si_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from torchmetrics.functional.audio import scale_invariant_signal_distortion_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_si_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from torchmetrics.functional.audio import signal_noise_ratio

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_srmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from torchmetrics.functional.audio.srmr import speech_reverberation_modulation_energy_ratio
from torchmetrics.utilities.imports import _TORCHAUDIO_GREATER_EQUAL_0_10

from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_stoi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from torchmetrics.functional.audio import short_time_objective_intelligibility

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/bases/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torchmetrics.collections import MetricCollection

from unittests import BATCH_SIZE, NUM_BATCHES
from unittests.helpers.testers import MetricTester
from unittests._helpers.testers import MetricTester


def compare_mean(values, weights):
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
)
from torchmetrics.utilities.checks import _allclose_recursive

from unittests.helpers import seed_all
from unittests.helpers.testers import DummyMetricDiff, DummyMetricMultiOutputDict, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyMetricDiff, DummyMetricMultiOutputDict, DummyMetricSum

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from torchmetrics.utilities.exceptions import TorchMetricsUserError

from unittests import NUM_PROCESSES
from unittests.helpers import seed_all
from unittests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyListMetric, DummyMetric, DummyMetricSum

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/bases/test_hashing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from unittests.helpers.testers import DummyListMetric, DummyMetric
from unittests._helpers.testers import DummyListMetric, DummyMetric


@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from torchmetrics.classification import BinaryAccuracy
from torchmetrics.regression import PearsonCorrCoef

from unittests.helpers import seed_all
from unittests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/classification/_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch import Tensor

from unittests import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES, _GroupInput, _Input
from unittests.helpers import seed_all
from unittests._helpers import seed_all

seed_all(1)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _input_binary, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_auroc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_average_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_calibration_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_13

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_cohen_kappa.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_dice.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from torchmetrics.utilities.checks import _input_format_classification
from torchmetrics.utilities.enums import DataType

from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.classification._inputs import _input_binary, _input_binary_logits, _input_binary_prob
from unittests.classification._inputs import _input_multiclass as _input_mcls
from unittests.classification._inputs import _input_multiclass_logits as _input_mcls_logits
Expand All @@ -33,8 +35,6 @@
from unittests.classification._inputs import _input_multilabel_multidim as _input_mlmd
from unittests.classification._inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
from unittests.classification._inputs import _input_multilabel_prob as _input_mlb_prob
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
Loading
Loading