Skip to content

Commit

Permalink
Merge branch 'master' into todo-b905
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda committed Mar 15, 2024
2 parents 76aaa83 + 6277311 commit d8d41ac
Show file tree
Hide file tree
Showing 292 changed files with 3,822 additions and 2,249 deletions.
6 changes: 3 additions & 3 deletions .azure/gpu-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ jobs:
torch-ver: "1.13.1"
requires: "oldest"
"torch | 2.x":
docker-image: "pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime"
torch-ver: "2.2.0"
docker-image: "pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime"
torch-ver: "2.2.1"
# how long to run the job before automatically cancelling
timeoutInMinutes: "40"
# how much time to give 'run always even if cancelled tasks' before stopping them
Expand All @@ -50,7 +50,7 @@ jobs:
echo "##vso[task.setvariable variable=CUDA_VERSION_MM]$CUDA_version_mm"
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${CUDA_version_mm}/torch_stable.html"
# packages for running assistant
pip install -q packaging fire requests wget
pip install -q fire wget packaging
displayName: "set Env. vars"
- bash: |
Expand Down
35 changes: 28 additions & 7 deletions .azure/gpu-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
torch-ver: "1.13.1"
"PyTorch | 2.X":
docker-image: "pytorchlightning/torchmetrics:ubuntu22.04-cuda12.1.1-py3.11-torch2.2"
torch-ver: "2.2.0"
torch-ver: "2.2.1"
# how long to run the job before automatically cancelling
timeoutInMinutes: "120"
# how much time to give 'run always even if cancelled tasks' before stopping them
Expand All @@ -45,7 +45,7 @@ jobs:
HF_HUB_CACHE: "/var/tmp/hf/hub"
PIP_CACHE_DIR: "/var/tmp/pip"
# MKL_THREADING_LAYER: "GNU"
MKL_SERVICE_FORCE_INTEL: 1
MKL_SERVICE_FORCE_INTEL: "1"
TEST_DIRS: "unittests"
# todo: consider unfreeze for master too
FREEZE_REQUIREMENTS: 1
Expand All @@ -72,6 +72,7 @@ jobs:
displayName: "set Env. vars for PRs"
- bash: |
pip install -q fire pyGithub
printf "PR: $PR_NUMBER \n"
focus=$(python .github/assistant.py changed-domains $PR_NUMBER)
printf "focus: $focus \n"
Expand Down Expand Up @@ -117,14 +118,21 @@ jobs:
- bash: |
set -e
pip list
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: {ver}'"
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'found GPUs: {mgpu}'"
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(torch-ver)', f'PyTorch: installed {ver} but expected $(torch-ver)'"
displayName: "Sanity check"
- bash: |
pip install -q py-tree
py-tree /var/tmp/torch
py-tree /var/tmp/hf
# this gives more the 60k lines and takes a few minutes to run
#py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# make sure the cache exists even it is empty
mkdir -p /var/tmp/cached-references
# copy the cache to the tests folder to be used in the next steps
cp -r /var/tmp/cached-references tests/_cache-references
du -h --max-depth=1 tests/
displayName: "Show caches"
- bash: |
Expand Down Expand Up @@ -152,7 +160,8 @@ jobs:
--reruns 3 --reruns-delay 1
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: ne(variables['TEST_DIRS'], '')
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "90"
displayName: "UnitTesting common"
- bash: |
Expand All @@ -163,9 +172,19 @@ jobs:
USE_PYTEST_POOL: "1"
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: ne(variables['TEST_DIRS'], '')
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "90"
displayName: "UnitTesting DDP"
- bash: |
du -h --max-depth=1 tests/
# copy potentially updated cache to the machine filesystem to be reused with next jobs
cp -r --update tests/_cache-references /var/tmp/cached-references
# set as extra step to not pollute general cache when jobs fails or crashes
# so do this update only with successful jobs on master
condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest'))
displayName: "Update cached refs"
- bash: |
python -m coverage report
python -m coverage xml
Expand All @@ -174,7 +193,7 @@ jobs:
ls -l
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: ne(variables['TEST_DIRS'], '')
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
displayName: "Statistics"
- bash: |
Expand All @@ -187,10 +206,12 @@ jobs:
done
workingDirectory: examples
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: ne(variables['TEST_DIRS'], '')
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
displayName: "Examples"
- bash: |
printf "cache location: $(HF_HOME)\n"
ls -lh $(HF_HOME) # show what was restored...
# do not fail if the cache is not present
continueOnError: "true"
displayName: "Show HF artifacts"
16 changes: 14 additions & 2 deletions .github/actions/pull-caches/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ runs:
using: "composite"
steps:
- name: install assistant's deps
run: pip install -q fire requests packaging wget
run: pip install -q packaging fire wget
shell: bash

- name: Set PyTorch version
Expand Down Expand Up @@ -51,7 +51,7 @@ runs:
echo "HF_HUB_CACHE=${cache_dir}${dir_sep}hf-hub" >> $GITHUB_ENV
shell: bash

- name: Cache pip
- name: Cache pip packages
continue-on-error: true
uses: actions/cache/restore@v3
with:
Expand Down Expand Up @@ -80,3 +80,15 @@ runs:
pip install -q py-tree
py-tree $CACHES_DIR
shell: bash

- name: Cache References
continue-on-error: true
uses: actions/cache/restore@v3
with:
path: tests/_cache-references
key: cache-references

- name: Restored References
continue-on-error: true
run: py-tree tests/_cache-references/ --show_hidden
shell: bash
12 changes: 12 additions & 0 deletions .github/actions/push-caches/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,15 @@ runs:
enableCrossOsArchive: true
path: ${{ env.CACHES_DIR }}
key: ci-caches

- name: Cache references
continue-on-error: true
uses: actions/cache/save@v3
with:
#enableCrossOsArchive: true
path: tests/_cache-references
key: cache-references

- name: Post References
run: py-tree tests/_cache-references/ --show_hidden
shell: bash
28 changes: 5 additions & 23 deletions .github/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import json
import logging
import os
import re
import sys
import traceback
from typing import List, Optional, Tuple, Union

import fire
import requests
from packaging.version import parse
from pkg_resources import parse_requirements

Expand All @@ -38,19 +35,6 @@
REQUIREMENTS_FILES = (*glob.glob(_path("requirements", "*.txt")), _path("requirements.txt"))


def request_url(url: str, auth_token: Optional[str] = None) -> Optional[dict]:
"""General request with checking if request limit was reached."""
auth_header = {"Authorization": f"token {auth_token}"} if auth_token else {}
try:
req = requests.get(url, headers=auth_header, timeout=_REQUEST_TIMEOUT)
except requests.exceptions.Timeout:
traceback.print_exc()
return None
if req.status_code == 403:
return None
return json.loads(req.content.decode(req.encoding))


class AssistantCLI:
"""CLI assistant for local CI."""

Expand Down Expand Up @@ -114,15 +98,13 @@ def changed_domains(
general_sub_pkgs: Tuple[str] = _PKG_WIDE_SUBPACKAGES,
) -> Union[str, List[str]]:
"""Determine what domains were changed in particular PR."""
import github

if not pr:
return "unittests"
url = f"https://api.github.com/repos/Lightning-AI/torchmetrics/pulls/{pr}/files"
logging.debug(url)
data = request_url(url, auth_token)
if not data:
logging.debug("WARNING: No data was received -> test everything.")
return "unittests"
files = [d["filename"] for d in data]
gh = github.Github()
pr = gh.get_repo("Lightning-AI/torchmetrics").get_pull(pr)
files = [f.filename for f in pr.get_files()]

# filter out all integrations as they run in separate suit
files = [fn for fn in files if not fn.startswith("tests/integrations")]
Expand Down
4 changes: 2 additions & 2 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pull_request_rules:
- "#check-failure<5"
actions:
label:
add: ["0:] Ready-To-Go"]
add: ["ready"]

- name: Not ready yet
conditions:
Expand All @@ -69,7 +69,7 @@ pull_request_rules:
- "#check-failure>=5"
actions:
label:
remove: ["0:] Ready-To-Go"]
remove: ["ready"]

- name: add core reviewer
conditions:
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/_focus-diff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:

jobs:
eval-diff:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
timeout-minutes: 5
# Map the job outputs to step outputs
outputs:
Expand All @@ -26,8 +26,9 @@ jobs:
env:
PR_NUMBER: "${{ github.event.pull_request.number }}"
run: |
set -e
echo $PR_NUMBER
pip install fire requests
pip install -q -U packaging fire pyGithub pyopenssl
# python .github/assistant.py changed-domains $PR_NUMBER
echo "focus=$(python .github/assistant.py changed-domains $PR_NUMBER)" >> $GITHUB_OUTPUT
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ci-integrate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
- { python-version: "3.10", os: "windows" } # todo: https://discuss.pytorch.org/t/numpy-is-not-available-error/146192
include:
- { python-version: "3.10", requires: "latest", os: "ubuntu-22.04" }
- { python-version: "3.10", requires: "latest", os: "macOS-14" } # M1 machine
env:
PYTORCH_URL: "https://download.pytorch.org/whl/cpu/torch_stable.html"
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
Expand Down
23 changes: 15 additions & 8 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,24 @@ jobs:
- "1.13.1"
- "2.0.1"
- "2.1.2"
- "2.2.0"
- "2.2.1"
include:
# cover additional python nad PR combinations
- { os: "ubuntu-22.04", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.0" }
- { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.2.0" }
- { os: "ubuntu-22.04", python-version: "3.10", pytorch-version: "2.2.1" }
- { os: "ubuntu-22.04", python-version: "3.11", pytorch-version: "2.2.1" }
# standard mac machine, not the M1
- { os: "macOS-12", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "macOS-12", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "macOS-12", python-version: "3.11", pytorch-version: "2.2.0" }
- { os: "macOS-12", python-version: "3.11", pytorch-version: "2.2.1" }
# using the ARM based M1 machine
- { os: "macOS-14", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "macOS-14", python-version: "3.11", pytorch-version: "2.2.1" }
# some windows
- { os: "windows-2022", python-version: "3.8", pytorch-version: "1.13.1" }
- { os: "windows-2022", python-version: "3.10", pytorch-version: "2.0.1" }
- { os: "windows-2022", python-version: "3.11", pytorch-version: "2.2.0" }
- { os: "windows-2022", python-version: "3.11", pytorch-version: "2.2.1" }
env:
PYTORCH_URL: "https://download.pytorch.org/whl/cpu/torch_stable.html"
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
Expand All @@ -60,7 +66,7 @@ jobs:
TEST_DIRS: ${{ needs.check-diff.outputs.test-dirs }}

# Timeout: https://stackoverflow.com/a/59076067/4521646
# seems that MacOS jobs take much more than orger OS
# seems that macOS jobs take much more than orger OS
timeout-minutes: 120

steps:
Expand All @@ -75,6 +81,7 @@ jobs:
if: ${{ runner.os == 'macOS' }}
run: |
echo 'UNITTEST_TIMEOUT=--timeout=75' >> $GITHUB_ENV
brew install mecab # https://github.com/coqui-ai/TTS/issues/1533#issuecomment-1338662303
brew install gcc libomp ffmpeg # https://github.com/pytorch/pytorch/issues/20030
- name: Setup Linux
if: ${{ runner.os == 'Linux' }}
Expand Down Expand Up @@ -162,7 +169,7 @@ jobs:
--reruns-delay 1 \
-m "not DDP" \
-n auto \
--dist=loadfile \
--dist=load \
${{ env.UNITTEST_TIMEOUT }}
- name: Unittests DDP
Expand Down Expand Up @@ -202,7 +209,7 @@ jobs:
fail_ci_if_error: false

- name: update cashing
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
if: github.event_name != 'pull_request'
continue-on-error: true
uses: ./.github/actions/push-caches
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/publish-pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- run: ls -lh dist/
# We do this, since failures on test.pypi aren't that bad
- name: Publish to Test PyPI
uses: pypa/[email protected].11
uses: pypa/[email protected].12
with:
user: __token__
password: ${{ secrets.test_pypi_password }}
Expand All @@ -94,7 +94,7 @@ jobs:
path: dist
- run: ls -lh dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/[email protected].11
uses: pypa/[email protected].12
with:
user: __token__
password: ${{ secrets.pypi_password }}
Expand Down
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ pip-delete-this-directory.txt
# Unit test / coverage reports
tests/_data/
data.zip
tests/_cache-references/
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
Expand Down
Loading

0 comments on commit d8d41ac

Please sign in to comment.