From bd4060d5b8d17b7ee6d8ce941bb1e2cebac50c81 Mon Sep 17 00:00:00 2001
From: Dorota Toczydlowska <115542912+dorotat-nv@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:28:33 +0100
Subject: [PATCH] added NeMoLogger unit tests (#511)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adding hotfix and unit tests uncovering issue with WandbLogger in
https://nvidia.slack.com/archives/C074Z808N05/p1733418209959769
## DETAILS
An interesting observation: this genuinely appears to be a wandb bug,
since it can only be captured once it’s converted into a Lightning hook
during training. Only one out of 4 new unit tests fails when probably
all of them should
The error is only uncovered only after `NeMoLogger` setup as n
https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
and after `plt.Trainer` setup at the start of the training in
https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
which is executed in
https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34
## TESTING ON BROKEN MAIN BRANCH
```
self = Settings(allow_offline_artifacts=True, allow_val_change=False, anonymous=None, api_key=None, azure_account_url_to_acce...andb', sync_symlink_latest='/workspace/bionemo2/wandb/latest-run', timespec='', wandb_dir='/workspace/bionemo2/wandb/')
name = 'root_dir', value = PosixPath('/tmp/pytest-of-dorotat/pytest-20/test_nemo_logger_initilized0/unit-test-loggers')
def __setattr__(self, name: str, value: Any) -> None:
if name in self.__class_vars__:
raise AttributeError(
f'{name!r} is a ClassVar of `{self.__class__.__name__}` and cannot be set on an instance. '
f'If you want to set a value on the class, use `{self.__class__.__name__}.{name} = value`.'
)
elif not _fields.is_valid_field_name(name):
if self.__pydantic_private__ is None or name not in self.__private_attributes__:
_object_setattr(self, name, value)
else:
attribute = self.__private_attributes__[name]
if hasattr(attribute, '__set__'):
attribute.__set__(self, value) # type: ignore
else:
self.__pydantic_private__[name] = value
return
self._check_frozen(name, value)
attr = getattr(self.__class__, name, None)
if isinstance(attr, property):
attr.__set__(self, value)
elif self.model_config.get('validate_assignment', None):
> self.__pydantic_validator__.validate_assignment(self, name, value)
E pydantic_core._pydantic_core.ValidationError: 1 validation error for Settings
E root_dir
E Input should be a valid string [type=string_type, input_value=PosixPath('/tmp/pytest-of...zed0/unit-test-loggers'), input_type=PosixPath]
E For further information visit https://errors.pydantic.dev/2.9/v/string_type
/usr/local/lib/python3.10/dist-packages/pydantic/main.py:881: ValidationError
```
---------
Co-authored-by: Steven Kothen-Hill <148821680+skothenhill-nv@users.noreply.github.com>
---
.../bionemo/llm/utils/test_logger_utils.py | 125 +++++++++++++++++-
1 file changed, 124 insertions(+), 1 deletion(-)
diff --git a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
index 8d8c9d5b5c..179ac81b08 100644
--- a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
+++ b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
@@ -13,9 +13,132 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from bionemo.llm.utils.logger_utils import setup_nemo_lightning_logger
+import logging
+import pathlib
+
+import pytest
+from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
+from nemo import lightning as nl
+from nemo.lightning.nemo_logger import NeMoLogger
+
+from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
+
+
+@pytest.fixture
+def project_name() -> str:
+ return "test_project"
+
+
+@pytest.fixture
+def wandb_config(project_name):
+ return WandbConfig(
+ entity=None,
+ project=project_name,
+ tags=["tag1", "tag2"],
+ group="test_group",
+ job_type="test_job",
+ offline=True, # ensure no actual communication with wandb servers
+ id=None,
+ anonymous=False,
+ log_model=False,
+ )
def test_construct_logger_no_wandb():
logger = setup_nemo_lightning_logger("test")
assert logger.name == "test"
+
+
+def test_setup_logger_all_loggers(tmp_path, wandb_config, project_name, caplog):
+ # Use a unique experiment name
+ exp_name = "unit-test-loggers"
+ root_dir = tmp_path # provided by pytest as a temporary directory
+
+ with caplog.at_level(logging.WARNING):
+ logger = setup_nemo_lightning_logger(
+ name=exp_name,
+ root_dir=root_dir,
+ initialize_tensorboard_logger=True,
+ wandb_config=wandb_config,
+ ckpt_callback=None,
+ )
+
+ # Checks on the returned logger
+ assert isinstance(logger, NeMoLogger), "The returned logger should be a NeMoLogger instance."
+ assert logger.name == exp_name
+
+ # Check that directories are set up correctly
+ expected_save_dir = root_dir / exp_name
+ assert logger.save_dir == expected_save_dir, "NeMoLogger save_dir should match expected path."
+ assert not expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+ # Check TensorBoard logger initialization
+ tb_logger = logger.tensorboard
+ assert isinstance(tb_logger, TensorBoardLogger), "TensorBoardLogger should be created."
+ tb_log_dir = pathlib.Path(tb_logger.log_dir)
+ assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+ assert tb_logger.name == exp_name, "TensorBoardLogger name should match experiment name."
+
+ # Check WandB logger initialization
+ wandb_logger = logger.wandb
+ assert isinstance(wandb_logger, WandbLogger), "WandBLogger should be created."
+ # Validate that wandb_logger uses correct save_dir and name
+ # WandbLogger's experiment is lazily created, so just check configured values
+ assert wandb_logger.name != exp_name, "WandBLogger name should not match experiment name."
+ assert wandb_logger.name == project_name, "WandBLogger name should match project name."
+ assert pathlib.Path(wandb_logger.save_dir) == expected_save_dir, "WandBLogger save_dir should match expected path."
+ # Since we provided wandb_config and tensorboard was enabled, we should NOT see
+ # the warnings about them being turned off.
+ assert "WandB is currently turned off." not in caplog.text
+ assert "User-set tensorboard is currently turned off." not in caplog.text
+
+
+def test_nemo_logger_initilized(tmp_path, wandb_config, project_name, caplog):
+ # Use a unique experiment name
+ exp_name = "unit-test-loggers"
+ root_dir = tmp_path # provided by pytest as a temporary directory
+ trainer = nl.Trainer(devices=1, accelerator="gpu", num_nodes=1)
+
+ logger = setup_nemo_lightning_logger(
+ name=exp_name,
+ root_dir=root_dir,
+ initialize_tensorboard_logger=True,
+ wandb_config=wandb_config,
+ ckpt_callback=None,
+ )
+
+ # as in https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
+ logger.setup(trainer=trainer)
+
+ # Check that directories are set up correctly
+ expected_save_dir = root_dir / exp_name
+ assert expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+ # Check TensorBoard logger initialization
+ tb_logger = logger.tensorboard
+ tb_log_dir = pathlib.Path(tb_logger.log_dir)
+ assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+
+ # Trigger lazy creation of experiment in loggers so loggers have their metadata available
+ # following trainer setup at the start of the training in
+ # https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
+ # which executes
+ # https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34
+ for _logger in trainer.loggers:
+ if hasattr(_logger, "experiment"):
+ _ = _logger.experiment
+
+
+def test_setup_logger_wandb_experiment(tmp_path, wandb_config, project_name, caplog):
+ exp_name = "unit-test-loggers"
+ root_dir = tmp_path # provided by pytest as a temporary directory
+
+ logger = setup_nemo_lightning_logger(
+ name=exp_name,
+ root_dir=root_dir,
+ initialize_tensorboard_logger=True,
+ wandb_config=wandb_config,
+ ckpt_callback=None,
+ )
+ wandb_logger = logger.wandb
+ _ = wandb_logger.experiment