From 04f3be1a6739170d5ce575342e8b35f6cc4f8e95 Mon Sep 17 00:00:00 2001 From: Sebastian Walter Date: Thu, 27 Jun 2024 07:28:40 +0200 Subject: [PATCH] offload fsdp state dict to cpu and rank 0 only --- python/text_utils/api/trainer.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/text_utils/api/trainer.py b/python/text_utils/api/trainer.py index fb75fcf..3e293c3 100644 --- a/python/text_utils/api/trainer.py +++ b/python/text_utils/api/trainer.py @@ -221,17 +221,16 @@ def __init__( # later, because FSDP handles mixed precision itself self.mixed_precision = None - offload_state_dict = self.info.world_size > 1 FSDP.set_state_dict_type( self.model, StateDictType.FULL_STATE_DICT, FullStateDictConfig( - offload_to_cpu=offload_state_dict, - rank0_only=offload_state_dict + offload_to_cpu=True, + rank0_only=True ), FullOptimStateDictConfig( - offload_to_cpu=offload_state_dict, - rank0_only=offload_state_dict + offload_to_cpu=True, + rank0_only=True ) )