From bd4060d5b8d17b7ee6d8ce941bb1e2cebac50c81 Mon Sep 17 00:00:00 2001 From: Dorota Toczydlowska <115542912+dorotat-nv@users.noreply.github.com> Date: Thu, 12 Dec 2024 18:28:33 +0100 Subject: [PATCH] added NeMoLogger unit tests (#511) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding hotfix and unit tests uncovering issue with WandbLogger in https://nvidia.slack.com/archives/C074Z808N05/p1733418209959769 ## DETAILS An interesting observation: this genuinely appears to be a wandb bug, since it can only be captured once it’s converted into a Lightning hook during training. Only one out of 4 new unit tests fails when probably all of them should The error is only uncovered only after `NeMoLogger` setup as n https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6 and after `plt.Trainer` setup at the start of the training in https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944 which is executed in https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34 ## TESTING ON BROKEN MAIN BRANCH ``` self = Settings(allow_offline_artifacts=True, allow_val_change=False, anonymous=None, api_key=None, azure_account_url_to_acce...andb', sync_symlink_latest='/workspace/bionemo2/wandb/latest-run', timespec='', wandb_dir='/workspace/bionemo2/wandb/') name = 'root_dir', value = PosixPath('/tmp/pytest-of-dorotat/pytest-20/test_nemo_logger_initilized0/unit-test-loggers') def __setattr__(self, name: str, value: Any) -> None: if name in self.__class_vars__: raise AttributeError( f'{name!r} is a ClassVar of `{self.__class__.__name__}` and cannot be set on an instance. ' f'If you want to set a value on the class, use `{self.__class__.__name__}.{name} = value`.' ) elif not _fields.is_valid_field_name(name): if self.__pydantic_private__ is None or name not in self.__private_attributes__: _object_setattr(self, name, value) else: attribute = self.__private_attributes__[name] if hasattr(attribute, '__set__'): attribute.__set__(self, value) # type: ignore else: self.__pydantic_private__[name] = value return self._check_frozen(name, value) attr = getattr(self.__class__, name, None) if isinstance(attr, property): attr.__set__(self, value) elif self.model_config.get('validate_assignment', None): > self.__pydantic_validator__.validate_assignment(self, name, value) E pydantic_core._pydantic_core.ValidationError: 1 validation error for Settings E root_dir E Input should be a valid string [type=string_type, input_value=PosixPath('/tmp/pytest-of...zed0/unit-test-loggers'), input_type=PosixPath] E For further information visit https://errors.pydantic.dev/2.9/v/string_type /usr/local/lib/python3.10/dist-packages/pydantic/main.py:881: ValidationError ``` image image --------- Co-authored-by: Steven Kothen-Hill <148821680+skothenhill-nv@users.noreply.github.com> --- .../bionemo/llm/utils/test_logger_utils.py | 125 +++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py index 8d8c9d5b5c..179ac81b08 100644 --- a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py +++ b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py @@ -13,9 +13,132 @@ # See the License for the specific language governing permissions and # limitations under the License. -from bionemo.llm.utils.logger_utils import setup_nemo_lightning_logger +import logging +import pathlib + +import pytest +from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger +from nemo import lightning as nl +from nemo.lightning.nemo_logger import NeMoLogger + +from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger + + +@pytest.fixture +def project_name() -> str: + return "test_project" + + +@pytest.fixture +def wandb_config(project_name): + return WandbConfig( + entity=None, + project=project_name, + tags=["tag1", "tag2"], + group="test_group", + job_type="test_job", + offline=True, # ensure no actual communication with wandb servers + id=None, + anonymous=False, + log_model=False, + ) def test_construct_logger_no_wandb(): logger = setup_nemo_lightning_logger("test") assert logger.name == "test" + + +def test_setup_logger_all_loggers(tmp_path, wandb_config, project_name, caplog): + # Use a unique experiment name + exp_name = "unit-test-loggers" + root_dir = tmp_path # provided by pytest as a temporary directory + + with caplog.at_level(logging.WARNING): + logger = setup_nemo_lightning_logger( + name=exp_name, + root_dir=root_dir, + initialize_tensorboard_logger=True, + wandb_config=wandb_config, + ckpt_callback=None, + ) + + # Checks on the returned logger + assert isinstance(logger, NeMoLogger), "The returned logger should be a NeMoLogger instance." + assert logger.name == exp_name + + # Check that directories are set up correctly + expected_save_dir = root_dir / exp_name + assert logger.save_dir == expected_save_dir, "NeMoLogger save_dir should match expected path." + assert not expected_save_dir.exists(), "Expected experiment directory should not be created yet." + + # Check TensorBoard logger initialization + tb_logger = logger.tensorboard + assert isinstance(tb_logger, TensorBoardLogger), "TensorBoardLogger should be created." + tb_log_dir = pathlib.Path(tb_logger.log_dir) + assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet." + assert tb_logger.name == exp_name, "TensorBoardLogger name should match experiment name." + + # Check WandB logger initialization + wandb_logger = logger.wandb + assert isinstance(wandb_logger, WandbLogger), "WandBLogger should be created." + # Validate that wandb_logger uses correct save_dir and name + # WandbLogger's experiment is lazily created, so just check configured values + assert wandb_logger.name != exp_name, "WandBLogger name should not match experiment name." + assert wandb_logger.name == project_name, "WandBLogger name should match project name." + assert pathlib.Path(wandb_logger.save_dir) == expected_save_dir, "WandBLogger save_dir should match expected path." + # Since we provided wandb_config and tensorboard was enabled, we should NOT see + # the warnings about them being turned off. + assert "WandB is currently turned off." not in caplog.text + assert "User-set tensorboard is currently turned off." not in caplog.text + + +def test_nemo_logger_initilized(tmp_path, wandb_config, project_name, caplog): + # Use a unique experiment name + exp_name = "unit-test-loggers" + root_dir = tmp_path # provided by pytest as a temporary directory + trainer = nl.Trainer(devices=1, accelerator="gpu", num_nodes=1) + + logger = setup_nemo_lightning_logger( + name=exp_name, + root_dir=root_dir, + initialize_tensorboard_logger=True, + wandb_config=wandb_config, + ckpt_callback=None, + ) + + # as in https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6 + logger.setup(trainer=trainer) + + # Check that directories are set up correctly + expected_save_dir = root_dir / exp_name + assert expected_save_dir.exists(), "Expected experiment directory should not be created yet." + + # Check TensorBoard logger initialization + tb_logger = logger.tensorboard + tb_log_dir = pathlib.Path(tb_logger.log_dir) + assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet." + + # Trigger lazy creation of experiment in loggers so loggers have their metadata available + # following trainer setup at the start of the training in + # https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944 + # which executes + # https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34 + for _logger in trainer.loggers: + if hasattr(_logger, "experiment"): + _ = _logger.experiment + + +def test_setup_logger_wandb_experiment(tmp_path, wandb_config, project_name, caplog): + exp_name = "unit-test-loggers" + root_dir = tmp_path # provided by pytest as a temporary directory + + logger = setup_nemo_lightning_logger( + name=exp_name, + root_dir=root_dir, + initialize_tensorboard_logger=True, + wandb_config=wandb_config, + ckpt_callback=None, + ) + wandb_logger = logger.wandb + _ = wandb_logger.experiment