From afe570840a17cad2d039e1aceb8550974719bd2d Mon Sep 17 00:00:00 2001 From: withbest Date: Mon, 6 Jan 2025 11:14:52 +0100 Subject: [PATCH] chore: remove redundant words in comment (#20510) Signed-off-by: withbest --- docs/source-pytorch/tuning/profiler_intermediate.rst | 4 ++-- src/lightning/fabric/strategies/deepspeed.py | 2 +- src/lightning/pytorch/core/module.py | 2 +- src/lightning/pytorch/strategies/deepspeed.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source-pytorch/tuning/profiler_intermediate.rst b/docs/source-pytorch/tuning/profiler_intermediate.rst index 802bfc5e6db4e..87aed86ac3653 100644 --- a/docs/source-pytorch/tuning/profiler_intermediate.rst +++ b/docs/source-pytorch/tuning/profiler_intermediate.rst @@ -55,7 +55,7 @@ The profiler will generate an output like this: Self CPU time total: 1.681ms .. note:: - When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. + When using the PyTorch Profiler, wall clock time will not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the ``SimpleProfiler``. @@ -142,7 +142,7 @@ This profiler will record ``training_step``, ``validation_step``, ``test_step``, The output above shows the profiling for the action ``training_step``. .. note:: - When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. + When using the PyTorch Profiler, wall clock time will not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the ``SimpleProfiler``. diff --git a/src/lightning/fabric/strategies/deepspeed.py b/src/lightning/fabric/strategies/deepspeed.py index 1e94fa1166f93..4af5ec65949c9 100644 --- a/src/lightning/fabric/strategies/deepspeed.py +++ b/src/lightning/fabric/strategies/deepspeed.py @@ -144,7 +144,7 @@ def __init__( nvme_path: Filesystem path for NVMe device for optimizer/parameter state offloading. optimizer_buffer_count: Number of buffers in buffer pool for optimizer state offloading - when ``offload_optimizer_device`` is set to to ``nvme``. + when ``offload_optimizer_device`` is set to ``nvme``. This should be at least the number of states maintained per parameter by the optimizer. For example, Adam optimizer has 4 states (parameter, gradient, momentum, and variance). diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index f1d1da924eac4..b8624daac3fa3 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -979,7 +979,7 @@ def configure_optimizers(self) -> OptimizerLRScheduler: # `scheduler.step()`. 1 corresponds to updating the learning # rate after every epoch/step. "frequency": 1, - # Metric to to monitor for schedulers like `ReduceLROnPlateau` + # Metric to monitor for schedulers like `ReduceLROnPlateau` "monitor": "val_loss", # If set to `True`, will enforce that the value specified 'monitor' # is available when the scheduler is updated, thus stopping diff --git a/src/lightning/pytorch/strategies/deepspeed.py b/src/lightning/pytorch/strategies/deepspeed.py index e17377d4464b0..d2d380f8788f8 100644 --- a/src/lightning/pytorch/strategies/deepspeed.py +++ b/src/lightning/pytorch/strategies/deepspeed.py @@ -166,7 +166,7 @@ def __init__( nvme_path: Filesystem path for NVMe device for optimizer/parameter state offloading. optimizer_buffer_count: Number of buffers in buffer pool for optimizer state offloading - when ``offload_optimizer_device`` is set to to ``nvme``. + when ``offload_optimizer_device`` is set to ``nvme``. This should be at least the number of states maintained per parameter by the optimizer. For example, Adam optimizer has 4 states (parameter, gradient, momentum, and variance).