From bd4060d5b8d17b7ee6d8ce941bb1e2cebac50c81 Mon Sep 17 00:00:00 2001
From: Dorota Toczydlowska <115542912+dorotat-nv@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:28:33 +0100
Subject: [PATCH] added NeMoLogger unit tests (#511)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding hotfix and unit tests uncovering issue with WandbLogger in
https://nvidia.slack.com/archives/C074Z808N05/p1733418209959769

## DETAILS
An interesting observation: this genuinely appears to be a wandb bug,
since it can only be captured once it’s converted into a Lightning hook
during training. Only one out of 4 new unit tests fails when probably
all of them should

The error is only uncovered only after `NeMoLogger` setup as n
https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
and after `plt.Trainer` setup at the start of the training in

https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
which is executed in

https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34

## TESTING ON BROKEN MAIN BRANCH


```
self = Settings(allow_offline_artifacts=True, allow_val_change=False, anonymous=None, api_key=None, azure_account_url_to_acce...andb', sync_symlink_latest='/workspace/bionemo2/wandb/latest-run', timespec='', wandb_dir='/workspace/bionemo2/wandb/')
name = 'root_dir', value = PosixPath('/tmp/pytest-of-dorotat/pytest-20/test_nemo_logger_initilized0/unit-test-loggers')

    def __setattr__(self, name: str, value: Any) -> None:
        if name in self.__class_vars__:
            raise AttributeError(
                f'{name!r} is a ClassVar of `{self.__class__.__name__}` and cannot be set on an instance. '
                f'If you want to set a value on the class, use `{self.__class__.__name__}.{name} = value`.'
            )
        elif not _fields.is_valid_field_name(name):
            if self.__pydantic_private__ is None or name not in self.__private_attributes__:
                _object_setattr(self, name, value)
            else:
                attribute = self.__private_attributes__[name]
                if hasattr(attribute, '__set__'):
                    attribute.__set__(self, value)  # type: ignore
                else:
                    self.__pydantic_private__[name] = value
            return

        self._check_frozen(name, value)

        attr = getattr(self.__class__, name, None)
        if isinstance(attr, property):
            attr.__set__(self, value)
        elif self.model_config.get('validate_assignment', None):
>           self.__pydantic_validator__.validate_assignment(self, name, value)
E           pydantic_core._pydantic_core.ValidationError: 1 validation error for Settings
E           root_dir
E             Input should be a valid string [type=string_type, input_value=PosixPath('/tmp/pytest-of...zed0/unit-test-loggers'), input_type=PosixPath]
E               For further information visit https://errors.pydantic.dev/2.9/v/string_type

/usr/local/lib/python3.10/dist-packages/pydantic/main.py:881: ValidationError
```
<img width="1362" alt="image"
src="https://github.com/user-attachments/assets/cb1f645d-4c1d-4baa-9e5e-474fbb04aa9b">
<img width="1154" alt="image"
src="https://github.com/user-attachments/assets/0cb54012-8d99-44d8-a556-9f6cbf5cd97b">

---------

Co-authored-by: Steven Kothen-Hill <148821680+skothenhill-nv@users.noreply.github.com>
---
 .../bionemo/llm/utils/test_logger_utils.py    | 125 +++++++++++++++++-
 1 file changed, 124 insertions(+), 1 deletion(-)
diff --git a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
index 8d8c9d5b5c..179ac81b08 100644
--- a/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
+++ b/sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
@@ -13,9 +13,132 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from bionemo.llm.utils.logger_utils import setup_nemo_lightning_logger
+import logging
+import pathlib
+
+import pytest
+from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
+from nemo import lightning as nl
+from nemo.lightning.nemo_logger import NeMoLogger
+
+from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
+
+
+@pytest.fixture
+def project_name() -> str:
+    return "test_project"
+
+
+@pytest.fixture
+def wandb_config(project_name):
+    return WandbConfig(
+        entity=None,
+        project=project_name,
+        tags=["tag1", "tag2"],
+        group="test_group",
+        job_type="test_job",
+        offline=True,  # ensure no actual communication with wandb servers
+        id=None,
+        anonymous=False,
+        log_model=False,
+    )
 
 
 def test_construct_logger_no_wandb():
     logger = setup_nemo_lightning_logger("test")
     assert logger.name == "test"
+
+
+def test_setup_logger_all_loggers(tmp_path, wandb_config, project_name, caplog):
+    # Use a unique experiment name
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+
+    with caplog.at_level(logging.WARNING):
+        logger = setup_nemo_lightning_logger(
+            name=exp_name,
+            root_dir=root_dir,
+            initialize_tensorboard_logger=True,
+            wandb_config=wandb_config,
+            ckpt_callback=None,
+        )
+
+    # Checks on the returned logger
+    assert isinstance(logger, NeMoLogger), "The returned logger should be a NeMoLogger instance."
+    assert logger.name == exp_name
+
+    # Check that directories are set up correctly
+    expected_save_dir = root_dir / exp_name
+    assert logger.save_dir == expected_save_dir, "NeMoLogger save_dir should match expected path."
+    assert not expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+    # Check TensorBoard logger initialization
+    tb_logger = logger.tensorboard
+    assert isinstance(tb_logger, TensorBoardLogger), "TensorBoardLogger should be created."
+    tb_log_dir = pathlib.Path(tb_logger.log_dir)
+    assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+    assert tb_logger.name == exp_name, "TensorBoardLogger name should match experiment name."
+
+    # Check WandB logger initialization
+    wandb_logger = logger.wandb
+    assert isinstance(wandb_logger, WandbLogger), "WandBLogger should be created."
+    # Validate that wandb_logger uses correct save_dir and name
+    # WandbLogger's experiment is lazily created, so just check configured values
+    assert wandb_logger.name != exp_name, "WandBLogger name should not match experiment name."
+    assert wandb_logger.name == project_name, "WandBLogger name should match project name."
+    assert pathlib.Path(wandb_logger.save_dir) == expected_save_dir, "WandBLogger save_dir should match expected path."
+    # Since we provided wandb_config and tensorboard was enabled, we should NOT see
+    # the warnings about them being turned off.
+    assert "WandB is currently turned off." not in caplog.text
+    assert "User-set tensorboard is currently turned off." not in caplog.text
+
+
+def test_nemo_logger_initilized(tmp_path, wandb_config, project_name, caplog):
+    # Use a unique experiment name
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+    trainer = nl.Trainer(devices=1, accelerator="gpu", num_nodes=1)
+
+    logger = setup_nemo_lightning_logger(
+        name=exp_name,
+        root_dir=root_dir,
+        initialize_tensorboard_logger=True,
+        wandb_config=wandb_config,
+        ckpt_callback=None,
+    )
+
+    # as in https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
+    logger.setup(trainer=trainer)
+
+    # Check that directories are set up correctly
+    expected_save_dir = root_dir / exp_name
+    assert expected_save_dir.exists(), "Expected experiment directory should not be created yet."
+
+    # Check TensorBoard logger initialization
+    tb_logger = logger.tensorboard
+    tb_log_dir = pathlib.Path(tb_logger.log_dir)
+    assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
+
+    # Trigger lazy creation of experiment in loggers so loggers have their metadata available
+    # following trainer setup at the start of the training in
+    # https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
+    # which executes
+    # https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34
+    for _logger in trainer.loggers:
+        if hasattr(_logger, "experiment"):
+            _ = _logger.experiment
+
+
+def test_setup_logger_wandb_experiment(tmp_path, wandb_config, project_name, caplog):
+    exp_name = "unit-test-loggers"
+    root_dir = tmp_path  # provided by pytest as a temporary directory
+
+    logger = setup_nemo_lightning_logger(
+        name=exp_name,
+        root_dir=root_dir,
+        initialize_tensorboard_logger=True,
+        wandb_config=wandb_config,
+        ckpt_callback=None,
+    )
+    wandb_logger = logger.wandb
+    _ = wandb_logger.experiment