Skip to content

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda authored Nov 13, 2024
2 parents 6f94ef8 + cae3335 commit 3184ae4
Show file tree
Hide file tree
Showing 19 changed files with 690 additions and 83 deletions.
32 changes: 16 additions & 16 deletions .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "pl-cpu (macOS-13, lightning, 3.9, 2.1, oldest)"
- "pl-cpu (macOS-14, lightning, 3.9, 2.1, oldest)"
- "pl-cpu (macOS-14, lightning, 3.10, 2.1)"
- "pl-cpu (macOS-14, lightning, 3.11, 2.2.2)"
- "pl-cpu (macOS-14, lightning, 3.11, 2.3)"
Expand All @@ -40,9 +40,9 @@ subprojects:
- "pl-cpu (macOS-14, pytorch, 3.9, 2.1)"
- "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)"
- "pl-cpu (windows-2022, pytorch, 3.9, 2.1)"
- "pl-cpu (macOS-13, pytorch, 3.10, 2.1)"
- "pl-cpu (ubuntu-22.04, pytorch, 3.10, 2.1)"
- "pl-cpu (windows-2022, pytorch, 3.10, 2.1)"
- "pl-cpu (macOS-14, pytorch, 3.12, 2.5.1)"
- "pl-cpu (ubuntu-22.04, pytorch, 3.12, 2.5.1)"
- "pl-cpu (windows-2022, pytorch, 3.12, 2.5.1)"

- id: "pytorch_lightning: Azure GPU"
paths:
Expand Down Expand Up @@ -171,7 +171,7 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "fabric-cpu (macOS-13, lightning, 3.9, 2.1, oldest)"
- "fabric-cpu (macOS-14, lightning, 3.9, 2.1, oldest)"
- "fabric-cpu (macOS-14, lightning, 3.10, 2.1)"
- "fabric-cpu (macOS-14, lightning, 3.11, 2.2.2)"
- "fabric-cpu (macOS-14, lightning, 3.11, 2.3)"
Expand All @@ -192,9 +192,9 @@ subprojects:
- "fabric-cpu (macOS-14, fabric, 3.9, 2.1)"
- "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)"
- "fabric-cpu (windows-2022, fabric, 3.9, 2.1)"
- "fabric-cpu (macOS-13, fabric, 3.10, 2.1)"
- "fabric-cpu (ubuntu-22.04, fabric, 3.10, 2.1)"
- "fabric-cpu (windows-2022, fabric, 3.10, 2.1)"
- "fabric-cpu (macOS-14, fabric, 3.12, 2.5.1)"
- "fabric-cpu (ubuntu-22.04, fabric, 3.12, 2.5.1)"
- "fabric-cpu (windows-2022, fabric, 3.12, 2.5.1)"

- id: "lightning_fabric: Azure GPU"
paths:
Expand Down Expand Up @@ -266,14 +266,14 @@ subprojects:
- "install-pkg (ubuntu-22.04, lightning, 3.11)"
- "install-pkg (ubuntu-22.04, notset, 3.9)"
- "install-pkg (ubuntu-22.04, notset, 3.11)"
- "install-pkg (macOS-13, fabric, 3.9)"
- "install-pkg (macOS-13, fabric, 3.11)"
- "install-pkg (macOS-13, pytorch, 3.9)"
- "install-pkg (macOS-13, pytorch, 3.11)"
- "install-pkg (macOS-13, lightning, 3.9)"
- "install-pkg (macOS-13, lightning, 3.11)"
- "install-pkg (macOS-13, notset, 3.9)"
- "install-pkg (macOS-13, notset, 3.11)"
- "install-pkg (macOS-14, fabric, 3.9)"
- "install-pkg (macOS-14, fabric, 3.11)"
- "install-pkg (macOS-14, pytorch, 3.9)"
- "install-pkg (macOS-14, pytorch, 3.11)"
- "install-pkg (macOS-14, lightning, 3.9)"
- "install-pkg (macOS-14, lightning, 3.11)"
- "install-pkg (macOS-14, notset, 3.9)"
- "install-pkg (macOS-14, notset, 3.11)"
- "install-pkg (windows-2022, fabric, 3.9)"
- "install-pkg (windows-2022, fabric, 3.11)"
- "install-pkg (windows-2022, pytorch, 3.9)"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-pkg-install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-22.04", "macOS-13", "windows-2022"]
os: ["ubuntu-22.04", "macOS-14", "windows-2022"]
pkg-name: ["fabric", "pytorch", "lightning", "notset"]
python-version: ["3.9", "3.11"]
steps:
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/ci-tests-fabric.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ jobs:
- { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" }
# only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues
- { os: "macOS-13", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" }
- { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" }
- { os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" }
- { os: "macOS-14", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "windows-2022", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.5.1" }
# "oldest" versions tests, only on minimum Python
- { os: "macOS-13", pkg-name: "lightning", python-version: "3.9", pytorch-version: "2.1", requires: "oldest" }
- { os: "macOS-14", pkg-name: "lightning", python-version: "3.9", pytorch-version: "2.1", requires: "oldest" }
- {
os: "ubuntu-20.04",
pkg-name: "lightning",
Expand Down Expand Up @@ -101,7 +101,10 @@ jobs:

- name: Set min. dependencies
if: ${{ matrix.requires == 'oldest' }}
run: python .actions/assistant.py replace_oldest_ver
run: |
python .actions/assistant.py replace_oldest_ver
pip install "cython<3.0" wheel
pip install "pyyaml==5.4" --no-build-isolation
- name: Adjust PyTorch versions in requirements files
if: ${{ matrix.requires != 'oldest' }}
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/ci-tests-pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ jobs:
- { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" }
# only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues
- { os: "macOS-13", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" }
- { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" }
- { os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" }
- { os: "macOS-14", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.5.1" }
- { os: "windows-2022", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.5.1" }
# "oldest" versions tests, only on minimum Python
- { os: "macOS-13", pkg-name: "lightning", python-version: "3.9", pytorch-version: "2.1", requires: "oldest" }
- { os: "macOS-14", pkg-name: "lightning", python-version: "3.9", pytorch-version: "2.1", requires: "oldest" }
- {
os: "ubuntu-20.04",
pkg-name: "lightning",
Expand Down Expand Up @@ -106,7 +106,10 @@ jobs:

- name: Set min. dependencies
if: ${{ matrix.requires == 'oldest' }}
run: python .actions/assistant.py replace_oldest_ver
run: |
python .actions/assistant.py replace_oldest_ver
pip install "cython<3.0" wheel
pip install "pyyaml==5.4" --no-build-isolation
- name: Adjust PyTorch versions in requirements files
if: ${{ matrix.requires != 'oldest' }}
Expand Down
2 changes: 1 addition & 1 deletion docs/source-pytorch/levels/expert.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Customize and extend Lightning for things like custom hardware or distributed st
:header: Level 24: Add a new accelerator or Strategy
:description: Integrate a new accelerator or distributed strategy.
:col_css: col-md-6
:button_link: expert_level_27.html
:button_link: expert_level_24.html
:height: 150
:tag: expert

Expand Down
8 changes: 8 additions & 0 deletions src/lightning/fabric/utilities/throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,14 @@ def measure_flops(
torch.int8: 389.9e12,
"int4": 779.8e12,
},
"rtx 4080 super": {
torch.float32: 52.2e12,
"tfloat32": 52.2e12,
torch.bfloat16: 52.2e12,
torch.float16: 52.2e12,
torch.int8: 417.6e12,
"int4": 835.2e12,
},
"l4": {
torch.float32: 30.3e12,
"tfloat32": 60e12,
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/pytorch/demos/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def forward(self, x: Tensor) -> Tensor:
# TODO: Could make this a `nn.Parameter` with `requires_grad=False`
self.pe = self._init_pos_encoding(device=x.device)

x = x + self.pe[: x.size(0), :]
x = x + self.pe[:, x.size(1)]
return self.dropout(x)

def _init_pos_encoding(self, device: torch.device) -> Tensor:
Expand All @@ -97,7 +97,7 @@ def _init_pos_encoding(self, device: torch.device) -> Tensor:
div_term = torch.exp(torch.arange(0, self.dim, 2, device=device).float() * (-math.log(10000.0) / self.dim))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
pe = pe.unsqueeze(0)
return pe


Expand Down
2 changes: 1 addition & 1 deletion src/lightning/pytorch/loggers/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def save_dir(self) -> Optional[str]:
"""
if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)
return self._tracking_uri[len(LOCAL_FILE_URI_PREFIX) :]
return None

@property
Expand Down
38 changes: 37 additions & 1 deletion src/lightning/pytorch/loops/evaluation_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import shutil
import sys
from collections import ChainMap, OrderedDict, defaultdict
from dataclasses import dataclass
from typing import Any, DefaultDict, Iterable, Iterator, List, Optional, Tuple, Union

from lightning_utilities.core.apply_func import apply_to_collection
Expand Down Expand Up @@ -45,6 +46,12 @@
from lightning.pytorch.utilities.signature_utils import is_param_in_hook_signature


@dataclass
class RestartStage:
NONE = "none"
RESTARTED_MID_EVALUATION = "restarted_mid_evaluation"


class _EvaluationLoop(_Loop):
"""Top-level loop where validation/testing starts."""

Expand Down Expand Up @@ -73,6 +80,7 @@ def __init__(
self._seen_batches_per_dataloader: DefaultDict[int, int] = defaultdict(int)
self._last_val_dl_reload_epoch = float("-inf")
self._module_mode = _ModuleMode()
self._restart_stage = RestartStage.NONE

@property
def num_dataloaders(self) -> int:
Expand Down Expand Up @@ -137,7 +145,7 @@ def run(self) -> List[_OUT_DICT]:
# this needs to wrap the `*_step` call too (not just `next`) for `dataloader_iter` support
break
finally:
self._restarting = False
self.on_iteration_done()
self._store_dataloader_outputs()
return self.on_run_end()

Expand Down Expand Up @@ -197,6 +205,24 @@ def setup_data(self) -> None:
# this depends on the data used, so reset it too
self._seen_batches_per_dataloader = defaultdict(int)

@property
def restarted_mid_evaluation(self) -> bool:
return self._restart_stage == RestartStage.RESTARTED_MID_EVALUATION

def update_restart_stage(self) -> None:
if (
self.restarting
and self.batch_progress.total.started == self.batch_progress.total.ready
and self.batch_progress.total.processed == self.batch_progress.total.started - 1
and self.batch_progress.total.completed == self.batch_progress.total.processed
):
self._restart_stage = RestartStage.RESTARTED_MID_EVALUATION
else:
self._restart_stage = RestartStage.NONE

def reset_restart_stage(self) -> None:
self._restart_stage = RestartStage.NONE

def reset(self) -> None:
"""Resets the internal state of the loop."""
trainer = self.trainer
Expand Down Expand Up @@ -236,6 +262,16 @@ def reset(self) -> None:
data_fetcher._stop_profiler = self._on_after_fetch
self._data_fetcher = data_fetcher

def increment_progress_to_evaluation_end(self) -> None:
self.setup_data()
if self.skip:
return
self.reset()
max_batch = int(max(self.max_batches))
if max_batch == -1:
return
self.batch_progress.increment_by(max_batch, True)

def on_run_start(self) -> None:
"""Runs the ``_on_evaluation_model_eval``, ``_on_evaluation_start`` and ``_on_evaluation_epoch_start``
hooks."""
Expand Down
Loading

0 comments on commit 3184ae4

Please sign in to comment.