setup logging

ad-freiburg · Aug 7, 2024 · a3e9b18 · a3e9b18
1 parent 721b63c
commit a3e9b18
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/python/text_utils/api/trainer.py b/python/text_utils/api/trainer.py
@@ -851,6 +851,7 @@ def _train_local_distributed(
         directories: dict[str, str],
         profile: str | None = None
     ):
+        logging.setup_logging()
         os.environ["MASTER_ADDR"] = "localhost"
         os.environ["MASTER_PORT"] = str(port)
 
@@ -890,6 +891,7 @@ def train_slurm(cls, work_dir: str, experiment_dir: str, config_path: str):
         assert torch.cuda.device_count() > 0, "need at least one GPU for training, but found none"
         assert dist.is_available(), "distributed package must be available for training"
         assert dist.is_nccl_available(), "nccl backend for distributed training must be available"
+        logging.setup_logging()
         logger = logging.get_logger("SLURM_INITIALIZATION")
         num_gpus = torch.cuda.device_count()
         logger.info(f"Found {num_gpus} GPU{'s' * (num_gpus > 1)} "
@@ -967,6 +969,7 @@ def train_slurm(cls, work_dir: str, experiment_dir: str, config_path: str):
 
     @classmethod
     def train_local(cls, work_dir: str, experiment_dir: str, config_path: str, profile: str | None = None):
+        logging.setup_logging()
         logger = logging.get_logger("LOCAL_INITIALIZATION")
         num_gpus = torch.cuda.device_count()
         assert num_gpus > 0, "need at least one GPU for local training"