added NeMoLogger unit tests (#511)

Adding hotfix and unit tests uncovering issue with WandbLogger in https://nvidia.slack.com/archives/C074Z808N05/p1733418209959769 ## DETAILS An interesting observation: this genuinely appears to be a wandb bug, since it can only be captured once it’s converted into a Lightning hook during training. Only one out of 4 new unit tests fails when probably all of them should The error is only uncovered only after `NeMoLogger` setup as n https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6 and after `plt.Trainer` setup at the start of the training in https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944 which is executed in https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34 ## TESTING ON BROKEN MAIN BRANCH ``` self = Settings(allow_offline_artifacts=True, allow_val_change=False, anonymous=None, api_key=None, azure_account_url_to_acce...andb', sync_symlink_latest='/workspace/bionemo2/wandb/latest-run', timespec='', wandb_dir='/workspace/bionemo2/wandb/') name = 'root_dir', value = PosixPath('/tmp/pytest-of-dorotat/pytest-20/test_nemo_logger_initilized0/unit-test-loggers') def __setattr__(self, name: str, value: Any) -> None: if name in self.__class_vars__: raise AttributeError( f'{name!r} is a ClassVar of `{self.__class__.__name__}` and cannot be set on an instance. ' f'If you want to set a value on the class, use `{self.__class__.__name__}.{name} = value`.' ) elif not _fields.is_valid_field_name(name): if self.__pydantic_private__ is None or name not in self.__private_attributes__: _object_setattr(self, name, value) else: attribute = self.__private_attributes__[name] if hasattr(attribute, '__set__'): attribute.__set__(self, value) # type: ignore else: self.__pydantic_private__[name] = value return self._check_frozen(name, value) attr = getattr(self.__class__, name, None) if isinstance(attr, property): attr.__set__(self, value) elif self.model_config.get('validate_assignment', None): > self.__pydantic_validator__.validate_assignment(self, name, value) E pydantic_core._pydantic_core.ValidationError: 1 validation error for Settings E root_dir E Input should be a valid string [type=string_type, input_value=PosixPath('/tmp/pytest-of...zed0/unit-test-loggers'), input_type=PosixPath] E For further information visit https://errors.pydantic.dev/2.9/v/string_type /usr/local/lib/python3.10/dist-packages/pydantic/main.py:881: ValidationError ``` <img width="1362" alt="image" src="https://github.com/user-attachments/assets/cb1f645d-4c1d-4baa-9e5e-474fbb04aa9b"> <img width="1154" alt="image" src="https://github.com/user-attachments/assets/0cb54012-8d99-44d8-a556-9f6cbf5cd97b"> --------- Co-authored-by: Steven Kothen-Hill <[email protected]>
NVIDIA · Dec 12, 2024 · bd4060d · bd4060d
1 parent bbc1447
commit bd4060d
Showing 1 changed file with 124 additions and 1 deletion.
diff --git a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
@@ -13,9 +13,132 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from bionemo.llm.utils.logger_utils import setup_nemo_lightning_logger
+import logging
+import pathlib
+
+import pytest
+from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
+from nemo import lightning as nl
+from nemo.lightning.nemo_logger import NeMoLogger
+
+from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
+
+
+@pytest.fixture
+def project_name() -> str:
+    return "test_project"
+
+
+@pytest.fixture
+def wandb_config(project_name):
+    return WandbConfig(
+        entity=None,
+        project=project_name,
+        tags=["tag1", "tag2"],
+        group="test_group",
+        job_type="test_job",
+        offline=True,  # ensure no actual communication with wandb servers
+        id=None,
+        anonymous=False,
+        log_model=False,
+    )
 
 
 def test_construct_logger_no_wandb():
     logger = setup_nemo_lightning_logger("test")
     assert logger.name == "test"
+
+
+def test_setup_logger_all_loggers(tmp_path, wandb_config, project_name, caplog):
+    # Use a unique experiment name
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+
+    with caplog.at_level(logging.WARNING):
+        logger = setup_nemo_lightning_logger(
+            name=exp_name,
+            root_dir=root_dir,
+            initialize_tensorboard_logger=True,
+            wandb_config=wandb_config,
+            ckpt_callback=None,
+        )
+
+    # Checks on the returned logger
+    assert isinstance(logger, NeMoLogger), "The returned logger should be a NeMoLogger instance."
+    assert logger.name == exp_name
+
+    # Check that directories are set up correctly
+    expected_save_dir = root_dir / exp_name
+    assert logger.save_dir == expected_save_dir, "NeMoLogger save_dir should match expected path."
+    assert not expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+    # Check TensorBoard logger initialization
+    tb_logger = logger.tensorboard
+    assert isinstance(tb_logger, TensorBoardLogger), "TensorBoardLogger should be created."
+    tb_log_dir = pathlib.Path(tb_logger.log_dir)
+    assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+    assert tb_logger.name == exp_name, "TensorBoardLogger name should match experiment name."
+
+    # Check WandB logger initialization
+    wandb_logger = logger.wandb
+    assert isinstance(wandb_logger, WandbLogger), "WandBLogger should be created."
+    # Validate that wandb_logger uses correct save_dir and name
+    # WandbLogger's experiment is lazily created, so just check configured values
+    assert wandb_logger.name != exp_name, "WandBLogger name should not match experiment name."
+    assert wandb_logger.name == project_name, "WandBLogger name should match project name."
+    assert pathlib.Path(wandb_logger.save_dir) == expected_save_dir, "WandBLogger save_dir should match expected path."
+    # Since we provided wandb_config and tensorboard was enabled, we should NOT see
+    # the warnings about them being turned off.
+    assert "WandB is currently turned off." not in caplog.text
+    assert "User-set tensorboard is currently turned off." not in caplog.text
+
+
+def test_nemo_logger_initilized(tmp_path, wandb_config, project_name, caplog):
+    # Use a unique experiment name
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+    trainer = nl.Trainer(devices=1, accelerator="gpu", num_nodes=1)
+
+    logger = setup_nemo_lightning_logger(
+        name=exp_name,
+        root_dir=root_dir,
+        initialize_tensorboard_logger=True,
+        wandb_config=wandb_config,
+        ckpt_callback=None,
+    )
+
+    # as in https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
+    logger.setup(trainer=trainer)
+
+    # Check that directories are set up correctly
+    expected_save_dir = root_dir / exp_name
+    assert expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+    # Check TensorBoard logger initialization
+    tb_logger = logger.tensorboard
+    tb_log_dir = pathlib.Path(tb_logger.log_dir)
+    assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+
+    # Trigger lazy creation of experiment in loggers so loggers have their metadata available
+    # following trainer setup at the start of the training in
+    # https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
+    # which executes
+    # https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34
+    for _logger in trainer.loggers:
+        if hasattr(_logger, "experiment"):
+            _ = _logger.experiment
+
+
+def test_setup_logger_wandb_experiment(tmp_path, wandb_config, project_name, caplog):
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+
+    logger = setup_nemo_lightning_logger(
+        name=exp_name,
+        root_dir=root_dir,
+        initialize_tensorboard_logger=True,
+        wandb_config=wandb_config,
+        ckpt_callback=None,
+    )
+    wandb_logger = logger.wandb
+    _ = wandb_logger.experiment