Skip to content

Commit

Permalink
added NeMoLogger unit tests (#511)
Browse files Browse the repository at this point in the history
Adding hotfix and unit tests uncovering issue with WandbLogger in
https://nvidia.slack.com/archives/C074Z808N05/p1733418209959769

## DETAILS
An interesting observation: this genuinely appears to be a wandb bug,
since it can only be captured once it’s converted into a Lightning hook
during training. Only one out of 4 new unit tests fails when probably
all of them should

The error is only uncovered only after `NeMoLogger` setup as n
https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
and after `plt.Trainer` setup at the start of the training in

https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
which is executed in

https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34

## TESTING ON BROKEN MAIN BRANCH


```
self = Settings(allow_offline_artifacts=True, allow_val_change=False, anonymous=None, api_key=None, azure_account_url_to_acce...andb', sync_symlink_latest='/workspace/bionemo2/wandb/latest-run', timespec='', wandb_dir='/workspace/bionemo2/wandb/')
name = 'root_dir', value = PosixPath('/tmp/pytest-of-dorotat/pytest-20/test_nemo_logger_initilized0/unit-test-loggers')

    def __setattr__(self, name: str, value: Any) -> None:
        if name in self.__class_vars__:
            raise AttributeError(
                f'{name!r} is a ClassVar of `{self.__class__.__name__}` and cannot be set on an instance. '
                f'If you want to set a value on the class, use `{self.__class__.__name__}.{name} = value`.'
            )
        elif not _fields.is_valid_field_name(name):
            if self.__pydantic_private__ is None or name not in self.__private_attributes__:
                _object_setattr(self, name, value)
            else:
                attribute = self.__private_attributes__[name]
                if hasattr(attribute, '__set__'):
                    attribute.__set__(self, value)  # type: ignore
                else:
                    self.__pydantic_private__[name] = value
            return
    
        self._check_frozen(name, value)
    
        attr = getattr(self.__class__, name, None)
        if isinstance(attr, property):
            attr.__set__(self, value)
        elif self.model_config.get('validate_assignment', None):
>           self.__pydantic_validator__.validate_assignment(self, name, value)
E           pydantic_core._pydantic_core.ValidationError: 1 validation error for Settings
E           root_dir
E             Input should be a valid string [type=string_type, input_value=PosixPath('/tmp/pytest-of...zed0/unit-test-loggers'), input_type=PosixPath]
E               For further information visit https://errors.pydantic.dev/2.9/v/string_type

/usr/local/lib/python3.10/dist-packages/pydantic/main.py:881: ValidationError
```
<img width="1362" alt="image"
src="https://github.com/user-attachments/assets/cb1f645d-4c1d-4baa-9e5e-474fbb04aa9b">
<img width="1154" alt="image"
src="https://github.com/user-attachments/assets/0cb54012-8d99-44d8-a556-9f6cbf5cd97b">

---------

Co-authored-by: Steven Kothen-Hill <[email protected]>
  • Loading branch information
dorotat-nv and skothenhill-nv authored Dec 12, 2024
1 parent bbc1447 commit bd4060d
Showing 1 changed file with 124 additions and 1 deletion.
125 changes: 124 additions & 1 deletion sub-packages/bionemo-llm/tests/bionemo/llm/utils/test_logger_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,132 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from bionemo.llm.utils.logger_utils import setup_nemo_lightning_logger
import logging
import pathlib

import pytest
from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger
from nemo import lightning as nl
from nemo.lightning.nemo_logger import NeMoLogger

from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger


@pytest.fixture
def project_name() -> str:
return "test_project"


@pytest.fixture
def wandb_config(project_name):
return WandbConfig(
entity=None,
project=project_name,
tags=["tag1", "tag2"],
group="test_group",
job_type="test_job",
offline=True, # ensure no actual communication with wandb servers
id=None,
anonymous=False,
log_model=False,
)


def test_construct_logger_no_wandb():
logger = setup_nemo_lightning_logger("test")
assert logger.name == "test"


def test_setup_logger_all_loggers(tmp_path, wandb_config, project_name, caplog):
# Use a unique experiment name
exp_name = "unit-test-loggers"
root_dir = tmp_path # provided by pytest as a temporary directory

with caplog.at_level(logging.WARNING):
logger = setup_nemo_lightning_logger(
name=exp_name,
root_dir=root_dir,
initialize_tensorboard_logger=True,
wandb_config=wandb_config,
ckpt_callback=None,
)

# Checks on the returned logger
assert isinstance(logger, NeMoLogger), "The returned logger should be a NeMoLogger instance."
assert logger.name == exp_name

# Check that directories are set up correctly
expected_save_dir = root_dir / exp_name
assert logger.save_dir == expected_save_dir, "NeMoLogger save_dir should match expected path."
assert not expected_save_dir.exists(), "Expected experiment directory should not be created yet."

# Check TensorBoard logger initialization
tb_logger = logger.tensorboard
assert isinstance(tb_logger, TensorBoardLogger), "TensorBoardLogger should be created."
tb_log_dir = pathlib.Path(tb_logger.log_dir)
assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."
assert tb_logger.name == exp_name, "TensorBoardLogger name should match experiment name."

# Check WandB logger initialization
wandb_logger = logger.wandb
assert isinstance(wandb_logger, WandbLogger), "WandBLogger should be created."
# Validate that wandb_logger uses correct save_dir and name
# WandbLogger's experiment is lazily created, so just check configured values
assert wandb_logger.name != exp_name, "WandBLogger name should not match experiment name."
assert wandb_logger.name == project_name, "WandBLogger name should match project name."
assert pathlib.Path(wandb_logger.save_dir) == expected_save_dir, "WandBLogger save_dir should match expected path."
# Since we provided wandb_config and tensorboard was enabled, we should NOT see
# the warnings about them being turned off.
assert "WandB is currently turned off." not in caplog.text
assert "User-set tensorboard is currently turned off." not in caplog.text


def test_nemo_logger_initilized(tmp_path, wandb_config, project_name, caplog):
# Use a unique experiment name
exp_name = "unit-test-loggers"
root_dir = tmp_path # provided by pytest as a temporary directory
trainer = nl.Trainer(devices=1, accelerator="gpu", num_nodes=1)

logger = setup_nemo_lightning_logger(
name=exp_name,
root_dir=root_dir,
initialize_tensorboard_logger=True,
wandb_config=wandb_config,
ckpt_callback=None,
)

# as in https://github.com/NVIDIA/NeMo/blob/bb895bc4b28ba99d707cb907c4496297a2a7b533/nemo/collections/llm/api.py#L852C22-L856C6
logger.setup(trainer=trainer)

# Check that directories are set up correctly
expected_save_dir = root_dir / exp_name
assert expected_save_dir.exists(), "Expected experiment directory should not be created yet."

# Check TensorBoard logger initialization
tb_logger = logger.tensorboard
tb_log_dir = pathlib.Path(tb_logger.log_dir)
assert not tb_log_dir.is_dir(), "TensorBoard log directory should not exist yet."

# Trigger lazy creation of experiment in loggers so loggers have their metadata available
# following trainer setup at the start of the training in
# https://github.com/Lightning-AI/pytorch-lightning/blob/de7c28ae865b5c9fd3ff21debebb994605f7f420/src/lightning/pytorch/trainer/trainer.py#L944
# which executes
# https://github.com/Lightning-AI/pytorch-lightning/blob/caa9e1e59436913e365bf52eeb2b07e3bf67efac/src/lightning/pytorch/trainer/call.py#L94C1-L97C34
for _logger in trainer.loggers:
if hasattr(_logger, "experiment"):
_ = _logger.experiment


def test_setup_logger_wandb_experiment(tmp_path, wandb_config, project_name, caplog):
exp_name = "unit-test-loggers"
root_dir = tmp_path # provided by pytest as a temporary directory

logger = setup_nemo_lightning_logger(
name=exp_name,
root_dir=root_dir,
initialize_tensorboard_logger=True,
wandb_config=wandb_config,
ckpt_callback=None,
)
wandb_logger = logger.wandb
_ = wandb_logger.experiment

0 comments on commit bd4060d

Please sign in to comment.