From a3e9b18c4d892630d6b662deb6316ba174b036cf Mon Sep 17 00:00:00 2001 From: Sebastian Walter Date: Wed, 7 Aug 2024 15:38:23 +0200 Subject: [PATCH] setup logging --- python/text_utils/api/trainer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/text_utils/api/trainer.py b/python/text_utils/api/trainer.py index 044d4fa..684eb8c 100644 --- a/python/text_utils/api/trainer.py +++ b/python/text_utils/api/trainer.py @@ -851,6 +851,7 @@ def _train_local_distributed( directories: dict[str, str], profile: str | None = None ): + logging.setup_logging() os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(port) @@ -890,6 +891,7 @@ def train_slurm(cls, work_dir: str, experiment_dir: str, config_path: str): assert torch.cuda.device_count() > 0, "need at least one GPU for training, but found none" assert dist.is_available(), "distributed package must be available for training" assert dist.is_nccl_available(), "nccl backend for distributed training must be available" + logging.setup_logging() logger = logging.get_logger("SLURM_INITIALIZATION") num_gpus = torch.cuda.device_count() logger.info(f"Found {num_gpus} GPU{'s' * (num_gpus > 1)} " @@ -967,6 +969,7 @@ def train_slurm(cls, work_dir: str, experiment_dir: str, config_path: str): @classmethod def train_local(cls, work_dir: str, experiment_dir: str, config_path: str, profile: str | None = None): + logging.setup_logging() logger = logging.get_logger("LOCAL_INITIALIZATION") num_gpus = torch.cuda.device_count() assert num_gpus > 0, "need at least one GPU for local training"