fix: dpo trainer ds config (#957)

* fix: dpo trainer ds config ref_model and model shouldn share the same ds config, so we shouldn modify the ds config directly. or else, it will cause sth wrong when init deepspeed engine * fix: import sort import sort by isort
huggingface · Nov 6, 2023 · 8e9cae8 · 8e9cae8
1 parent 654543a
commit 8e9cae8
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
@@ -16,6 +16,7 @@
 import random
 import warnings
 from collections import defaultdict
+from copy import deepcopy
 from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 
 import torch
@@ -345,7 +346,8 @@ def make_inputs_require_grad(module, input, output):
     def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
         # Adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
         deepspeed_plugin = self.accelerator.state.deepspeed_plugin
-        config_kwargs = deepspeed_plugin.deepspeed_config
+        config_kwargs = deepcopy(deepspeed_plugin.deepspeed_config)
+
         if model is not None:
             if hasattr(model, "config"):
                 hidden_size = (