From c7435a83a67ff588636d9e821b18ce3f36bcf165 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Mon, 6 Jan 2025 16:56:57 +0800 Subject: [PATCH] fix: lower `num_workers` to 4 For multi-task training in pytorch, each data source will have their own dataloader. If the number of workers of dataloaders is large, there will be many worker processes stressing CPU. Signed-off-by: Chun Cai --- deepmd/pt/utils/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py index 9803f8d04d..0e1322a640 100644 --- a/deepmd/pt/utils/env.py +++ b/deepmd/pt/utils/env.py @@ -21,7 +21,7 @@ ncpus = len(os.sched_getaffinity(0)) except AttributeError: ncpus = os.cpu_count() -NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(8, ncpus))) +NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(4, ncpus))) # Make sure DDP uses correct device if applicable LOCAL_RANK = os.environ.get("LOCAL_RANK") LOCAL_RANK = int(0 if LOCAL_RANK is None else LOCAL_RANK)