From 7827003a494b75c8a5eda192c8db0bad930dc4ca Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 17:05:32 -0400 Subject: [PATCH 01/38] initial skeleton --- tests/test_iterative_trainer.py | 77 +++++++++++ trl/trainer/iterative_trainer.py | 217 +++++++++++++++++++++++++++++++ 2 files changed, 294 insertions(+) create mode 100644 tests/test_iterative_trainer.py create mode 100644 trl/trainer/iterative_trainer.py diff --git a/tests/test_iterative_trainer.py b/tests/test_iterative_trainer.py new file mode 100644 index 0000000000..6939b2f5d9 --- /dev/null +++ b/tests/test_iterative_trainer.py @@ -0,0 +1,77 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tempfile +import unittest + +import torch +from datasets import Dataset +from pytest import mark +from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments + +from trl import IterativeTrainer + +from .testing_utils import require_peft + + +class DPOTrainerTester(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + cls.model = AutoModelForCausalLM.from_pretrained(cls.model_id) + cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_id) + cls.tokenizer.pad_token = cls.tokenizer.eos_token + + def _init_dummy_dataset(self): + dummy_dataset_dict = { + "prompt": [ + "hi nice", + "I am", + "My name is", + "Python" + ] + } + + return Dataset.from_dict(dummy_dataset_dict) + + def test_iterative_trainer(self): + with tempfile.TemporaryDirectory() as tmp_dir: + training_args = TrainingArguments( + output_dir=tmp_dir, + per_device_train_batch_size=2, + max_steps=3, + remove_unused_columns=False, + gradient_accumulation_steps=4, + learning_rate=9e-1, + evaluation_strategy="steps", + ) + + dummy_dataset = self._init_dummy_dataset() + + trainer = IterativeTrainer( + model=self.model, + tokenizer=self.tokenizer, + ) + + previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} + + trainer.train() + + self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"]) + + # check the params have changed + for n, param in previous_trainable_params.items(): + new_param = trainer.model.get_parameter(n) + # check the params have changed - ignore 0 biases + if param.sum() != 0: + self.assertFalse(torch.equal(param, new_param)) \ No newline at end of file diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py new file mode 100644 index 0000000000..7f1ca1c12e --- /dev/null +++ b/trl/trainer/iterative_trainer.py @@ -0,0 +1,217 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Callable, List, Optional + +import torch +from accelerate import Accelerator +from accelerate.utils import ProjectConfiguration +from datasets import Dataset +from torch.optim import Adam +from torch.utils.data import DataLoader +from transformers import ( + PreTrainedModel, + PreTrainedTokenizer, + PreTrainedTokenizerBase, + PreTrainedTokenizerFast, + Trainer, +) + +from ..core import PPODecorators, set_seed +from . import PPOConfig, RunningMoments + + +class IterativeTrainer(Trainer): + """ + TO DO + """ + + def __init__( + self, + config: PPOConfig = None, + model: PreTrainedModel = None, + tokenizer: PreTrainedTokenizerBase = None, + optimizer: Optional[torch.optim.Optimizer] = None, + data_collator: Optional[Callable] = None, + max_length: Optional[int] = None, + ): + """ + Initialize IterativeTrainer. + + Args: + config (`PPOConfig`): + Configuration object for PPOTrainer. Check the documentation of `PPOConfig` for more details. + model (`PreTrainedModel`): + Hugging Face transformer model. + tokenizer (`transformers.PreTrainedTokenizerBase`): + Hugging Face tokenizer + optimizer (Optional[`torch.optim.Optimizer`]): + Optimizer used for training. If `None`, the `Adam` is used as default. + data_collator (Optional[function]): + Data collator function. + """ + + super().__init__(config) + + # initial seed for reproducible experiments + set_seed(config.seed) + + # Step 0: check positional arguments validity + if not isinstance(config, PPOConfig): + raise ValueError(f"config must be a PPOConfig, got {type(config)}") + if not isinstance(tokenizer, (PreTrainedTokenizerBase)): + raise ValueError( + f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}" + ) + + # Step 1: Initialize Accelerator + self.accelerator = Accelerator( + log_with=config.log_with, + gradient_accumulation_steps=config.gradient_accumulation_steps, + project_config=ProjectConfiguration(**config.project_kwargs), + **config.accelerator_kwargs, + ) + + is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard" + + self.accelerator.init_trackers( + config.tracker_project_name, + config=dict(trl_ppo_trainer_config=config.to_dict()) if not is_using_tensorboard else config.to_dict(), + init_kwargs=config.tracker_kwargs, + ) + + self.model = model + self.model_params = filter(lambda p: p.requires_grad, self.model.parameters()) + self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") + self.is_peft_model = getattr(self.model, "is_peft_model", False) + + if not (isinstance(tokenizer, PreTrainedTokenizer) or isinstance(tokenizer, PreTrainedTokenizerFast)): + raise ValueError( + "tokenizer must be a transformers.PreTrainedTokenizer or transformers.PreTrainedTokenizerFast" + ) + self.tokenizer = tokenizer + + self.data_collator = data_collator + + if optimizer is None: + self.optimizer = Adam( + filter(lambda p: p.requires_grad, self.model.parameters()), + lr=self.config.learning_rate, + ) + else: + self.optimizer = optimizer + + (self.model, self.optimizer, self.data_collator,) = self.accelerator.prepare( + self.model, + self.optimizer, + self.data_collator, + ) + + self.is_distributed = self.accelerator.distributed_type == "MULTI_GPU" + + # post process for PP + if not getattr(self.model, "is_sequential_parallel", False): + self.current_device = self.accelerator.device + else: + self.current_device = torch.device("cuda:0") + + PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache + + self.running = RunningMoments(self.accelerator) + + def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): + input_data = self.data_collator( + [{"input_ids": ids, "attention_mask": torch.ones_like(ids)} for ids in labels] + ).to(self.model.device) + + input_data.pop("decoder_input_ids", None) # This is directly computed inside the model + + return input_data + + def compute_loss(self, model, inputs, return_outputs=False): + """ + Loss is computed as in the HuggingFace Trainer. + + Subclass and override for custom behavior. + """ + + outputs = model(**inputs) + + if isinstance(outputs, dict) and "loss" not in outputs: + raise ValueError( + "The model did not return a loss from the inputs, only the following keys: " + f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}." + ) + # We don't use .loss here since the model may return tuples instead of ModelOutput. + loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0] + + return (loss, outputs) if return_outputs else loss + + @PPODecorators.empty_cuda_cache() + def step( + self, input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor] + ): + """ + Run an optimisation step given a list of input_ids, attention_mask, and labels. + Args: + input_ids (List[`torch.LongTensor`]): + List of tensors containing the input_ids + attention_mask (List[`torch.LongTensor`]): + List of tensors containing the attenton_mask + labels (List[`torch.FloatTensor`]): + List of tensors containing the labels (if set to None, will default to input_ids) + Returns: + `dict[str, Any]`: A summary of the training statistics + """ + + self.model.train() + model_inputs = self.prepare_model_inputs(input_ids, attention_mask, labels) + + model_inputs_names = list(model_inputs.keys()) + + batch_dict = {} + batch_dict.update(model_inputs) + + def collator(data): + return_dict = dict() + for key in data[0]: + if key in ["input_ids", "attention_mask", "labels"]: + return_dict[key] = torch.stack([d[key] for d in data]).to(self.model.device) + return return_dict + + batch_data = Dataset.from_dict(batch_dict) + batch_data.set_format("torch") + + step_dataloader = DataLoader( + batch_data, + batch_size=self.args.step_batch_size, + shuffle=True, + collate_fn=collator, + ) + + all_stats = [] + + for _, batch in enumerate(step_dataloader): + with self.accelerator.accumulate(self.model): + model_inputs = {k: batch[k] for k in model_inputs_names} + loss = self.model(**model_inputs) + + self.accelerator.backward(loss) + + self.optimizer.step() + self.optimizer.zero_grad() + + # update stats etc + all_stats.append(dict(loss=dict(total=loss.detach()))) + + return all_stats From 0442b6bfa52108569e7ae0d56b51cc11e76a768e Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 18:37:18 -0400 Subject: [PATCH 02/38] iterative trainer for decoder only --- trl/trainer/iterative_config.py | 78 ++++++++++++++++++++++++++++++++ trl/trainer/iterative_trainer.py | 42 +++++++++-------- 2 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 trl/trainer/iterative_config.py diff --git a/trl/trainer/iterative_config.py b/trl/trainer/iterative_config.py new file mode 100644 index 0000000000..0dba8a1286 --- /dev/null +++ b/trl/trainer/iterative_config.py @@ -0,0 +1,78 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass, field +from typing import Optional + +from ..core import flatten_dict + + +@dataclass +class IterativeConfig(object): + """ + Configuration class for PPOTrainer + """ + + task_name: Optional[str] = field( + default=None, + metadata={"help": "Name of task to use - used only for tracking purposes"}, + ) + model_name: Optional[str] = field( + default=None, + metadata={"help": "Name of model to use - used only for tracking purposes"}, + ) + step_batch_size: Optional[int] = field( + default=256, metadata={"help": "Number of samples per optimisation step inside the step function"} + ) + gradient_accumulation_steps: Optional[int] = field( + default=1, metadata={"help": "The number of gradient accumulation steps"} + ) + remove_unused_columns: Optional[bool] = field( + default=True, + metadata={"help": "Remove unused columns from the dataset if `datasets.Dataset` is used"}, + ) + log_with: Optional[str] = field( + default=None, + metadata={ + "help": "Log with either 'wandb' or 'tensorboard', check https://huggingface.co/docs/accelerate/usage_guides/tracking for more details" + }, + ) + tracker_kwargs: Optional[dict] = field( + default_factory=dict, + metadata={"help": "Keyword arguments for the tracker (e.g. wandb_project)"}, + ) + accelerator_kwargs: Optional[dict] = field( + default_factory=dict, + metadata={"help": "Keyword arguments for the accelerator"}, + ) + project_kwargs: Optional[dict] = field( + default_factory=dict, + metadata={"help": "Keyword arguments for the accelerator project config (e.g. `logging_dir`)"}, + ) + tracker_project_name: Optional[str] = field( + default="trl", metadata={"help": "Name of project to use for tracking"} + ) + max_grad_norm: Optional[float] = field( + default=None, metadata={"help": "Maximum gradient norm for gradient clipping"} + ) + seed: Optional[int] = field(default=0, metadata={"help": "Seed value for random generations"}) + optimize_cuda_cache: Optional[bool] = field( + default=False, + metadata={"help": "Optimize CUDA cache for slightly more memory-efficient training"}, + ) + + def to_dict(self): + output_dict = {} + for key, value in self.__dict__.items(): + output_dict[key] = value + return flatten_dict(output_dict) diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index 7f1ca1c12e..ede5748bc3 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -19,38 +19,44 @@ from datasets import Dataset from torch.optim import Adam from torch.utils.data import DataLoader -from transformers import ( - PreTrainedModel, - PreTrainedTokenizer, - PreTrainedTokenizerBase, - PreTrainedTokenizerFast, - Trainer, -) +from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast from ..core import PPODecorators, set_seed -from . import PPOConfig, RunningMoments +from . import IterativeConfig, RunningMoments -class IterativeTrainer(Trainer): +class IterativeTrainer: """ - TO DO + The IterativeTrainer can be used to finetune models with methods that requires some steps between optimization. + + Attributes: + **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. + **model** (`PreTrainedModel`) -- Model to be optimized, Hugging Face transformer model with a causal language modeling head. + Check the documentation of `PreTrainedModelWrapper` for more details. + **tokenizer** (`PreTrainedTokenizerBase`) -- Tokenizer to be used for encoding the + data. Check the documentation of `transformers.PreTrainedTokenizer` and + `transformers.PreTrainedTokenizerFast` for more details. + **optimizer** (`torch.optim.Optimizer`, *optional*) -- Optimizer to be used for training. If no optimizer is + provided, the trainer will create an Adam optimizer with the learning rate specified in the configuration + object. + **data_collator** (DataCollatorForLanguageModeling, *optional*) -- Data collator to be used for training and + passed along the dataloader. """ def __init__( self, - config: PPOConfig = None, + config: IterativeConfig = None, model: PreTrainedModel = None, tokenizer: PreTrainedTokenizerBase = None, optimizer: Optional[torch.optim.Optimizer] = None, data_collator: Optional[Callable] = None, - max_length: Optional[int] = None, ): """ Initialize IterativeTrainer. Args: - config (`PPOConfig`): - Configuration object for PPOTrainer. Check the documentation of `PPOConfig` for more details. + config (`IterativeConfig`): + Configuration object for IterativeTrainer. model (`PreTrainedModel`): Hugging Face transformer model. tokenizer (`transformers.PreTrainedTokenizerBase`): @@ -67,8 +73,8 @@ def __init__( set_seed(config.seed) # Step 0: check positional arguments validity - if not isinstance(config, PPOConfig): - raise ValueError(f"config must be a PPOConfig, got {type(config)}") + if not isinstance(config, IterativeConfig): + raise ValueError(f"config must be a IterativeConfig, got {type(config)}") if not isinstance(tokenizer, (PreTrainedTokenizerBase)): raise ValueError( f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}" @@ -194,7 +200,7 @@ def collator(data): step_dataloader = DataLoader( batch_data, - batch_size=self.args.step_batch_size, + batch_size=self.config.step_batch_size, shuffle=True, collate_fn=collator, ) @@ -204,7 +210,7 @@ def collator(data): for _, batch in enumerate(step_dataloader): with self.accelerator.accumulate(self.model): model_inputs = {k: batch[k] for k in model_inputs_names} - loss = self.model(**model_inputs) + loss = self.compute_loss(self.model, model_inputs) self.accelerator.backward(loss) From 4255b0b221b28afb4396b06586fd9f13987a6d6e Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 18:39:46 -0400 Subject: [PATCH 03/38] iterative trainer unittest --- tests/test_iterative_trainer.py | 57 ++++++++++----------------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/tests/test_iterative_trainer.py b/tests/test_iterative_trainer.py index 6939b2f5d9..b4d0a00f32 100644 --- a/tests/test_iterative_trainer.py +++ b/tests/test_iterative_trainer.py @@ -11,20 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import tempfile import unittest import torch from datasets import Dataset -from pytest import mark -from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments +from transformers import AutoModelForCausalLM, AutoTokenizer -from trl import IterativeTrainer +from trl import IterativeConfig, IterativeTrainer -from .testing_utils import require_peft - -class DPOTrainerTester(unittest.TestCase): +class IterativeTrainerTester(unittest.TestCase): @classmethod def setUpClass(cls): cls.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" @@ -34,44 +30,25 @@ def setUpClass(cls): def _init_dummy_dataset(self): dummy_dataset_dict = { - "prompt": [ - "hi nice", - "I am", - "My name is", - "Python" - ] + "input_ids": [torch.tensor([5303, 3621]), torch.tensor([3666, 1438, 318]), torch.tensor([5303, 3621])], + "attention_mask": [torch.tensor([1, 1]), torch.tensor([1, 1, 1]), torch.tensor([1, 1])], } return Dataset.from_dict(dummy_dataset_dict) - def test_iterative_trainer(self): - with tempfile.TemporaryDirectory() as tmp_dir: - training_args = TrainingArguments( - output_dir=tmp_dir, - per_device_train_batch_size=2, - max_steps=3, - remove_unused_columns=False, - gradient_accumulation_steps=4, - learning_rate=9e-1, - evaluation_strategy="steps", - ) - - dummy_dataset = self._init_dummy_dataset() - - trainer = IterativeTrainer( - model=self.model, - tokenizer=self.tokenizer, - ) + def setUp(self): + # initialize trainer + self.iterative_config = IterativeConfig(step_batch_size=2, log_with=None) + self.model.train() + return super().setUp() - previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} + def test_iterative_step(self): + # initialize dataset + dummy_dataset = self._init_dummy_dataset() - trainer.train() + iterative_trainer = IterativeTrainer(config=self.iterative_config, model=self.model, tokenizer=self.tokenizer) - self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"]) + iterative_trainer.step(dummy_dataset["input_ids"], dummy_dataset["attention_mask"], dummy_dataset["labels"]) - # check the params have changed - for n, param in previous_trainable_params.items(): - new_param = trainer.model.get_parameter(n) - # check the params have changed - ignore 0 biases - if param.sum() != 0: - self.assertFalse(torch.equal(param, new_param)) \ No newline at end of file + for param in iterative_trainer.model.parameters(): + assert param.grad is not None From 5648602b051ef058414ff547db60662000491cf2 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 18:52:56 -0400 Subject: [PATCH 04/38] encoder_decoder support --- trl/trainer/iterative_config.py | 1 + trl/trainer/iterative_trainer.py | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/trl/trainer/iterative_config.py b/trl/trainer/iterative_config.py index 0dba8a1286..94eef16e7c 100644 --- a/trl/trainer/iterative_config.py +++ b/trl/trainer/iterative_config.py @@ -34,6 +34,7 @@ class IterativeConfig(object): step_batch_size: Optional[int] = field( default=256, metadata={"help": "Number of samples per optimisation step inside the step function"} ) + learning_rate: Optional[float] = field(default=1e-5, metadata={"help": "Adam learning rate"}) gradient_accumulation_steps: Optional[int] = field( default=1, metadata={"help": "The number of gradient accumulation steps"} ) diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index ede5748bc3..f825754ba4 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -27,7 +27,7 @@ class IterativeTrainer: """ - The IterativeTrainer can be used to finetune models with methods that requires some steps between optimization. + The IterativeTrainer can be used to finetune models with methods that require some steps between optimization. Attributes: **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. @@ -134,15 +134,25 @@ def __init__( PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache self.running = RunningMoments(self.accelerator) + self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): - input_data = self.data_collator( - [{"input_ids": ids, "attention_mask": torch.ones_like(ids)} for ids in labels] - ).to(self.model.device) + if self.is_encoder_decoder: + input_data = self.data_collator( + [ + {"input_ids": i, "attention_mask": a, "labels": l} + for i, a, l in zip(input_ids, attention_mask, labels) + ] + ).to(self.model.device) + + else: + input_data = self.data_collator( + [{"input_ids": ids, "attention_mask": torch.ones_like(ids)} for ids in labels] + ).to(self.model.device) - input_data.pop("decoder_input_ids", None) # This is directly computed inside the model + input_data.pop("decoder_input_ids", None) # This is directly computed inside the model - return input_data + return input_data def compute_loss(self, model, inputs, return_outputs=False): """ From faaae0a94c4bf49c3fe08cf459c636a465e6e588 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 19:17:29 -0400 Subject: [PATCH 05/38] fix typo in unittest --- tests/test_iterative_trainer.py | 12 +++++++++--- trl/trainer/iterative_trainer.py | 10 +++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/test_iterative_trainer.py b/tests/test_iterative_trainer.py index b4d0a00f32..85be6ed248 100644 --- a/tests/test_iterative_trainer.py +++ b/tests/test_iterative_trainer.py @@ -15,7 +15,7 @@ import torch from datasets import Dataset -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling from trl import IterativeConfig, IterativeTrainer @@ -32,21 +32,27 @@ def _init_dummy_dataset(self): dummy_dataset_dict = { "input_ids": [torch.tensor([5303, 3621]), torch.tensor([3666, 1438, 318]), torch.tensor([5303, 3621])], "attention_mask": [torch.tensor([1, 1]), torch.tensor([1, 1, 1]), torch.tensor([1, 1])], + "labels": [torch.tensor([5303, 3621]), torch.tensor([3666, 1438, 318]), torch.tensor([5303, 3621])], } - return Dataset.from_dict(dummy_dataset_dict) + dummy_dataset = Dataset.from_dict(dummy_dataset_dict) + dummy_dataset.set_format("torch") + return dummy_dataset def setUp(self): # initialize trainer self.iterative_config = IterativeConfig(step_batch_size=2, log_with=None) self.model.train() + self.data_collator = DataCollatorForLanguageModeling(self.tokenizer, mlm=False) return super().setUp() def test_iterative_step(self): # initialize dataset dummy_dataset = self._init_dummy_dataset() - iterative_trainer = IterativeTrainer(config=self.iterative_config, model=self.model, tokenizer=self.tokenizer) + iterative_trainer = IterativeTrainer( + config=self.iterative_config, model=self.model, tokenizer=self.tokenizer, data_collator=self.data_collator + ) iterative_trainer.step(dummy_dataset["input_ids"], dummy_dataset["attention_mask"], dummy_dataset["labels"]) diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index f825754ba4..d9f1764f17 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -21,13 +21,14 @@ from torch.utils.data import DataLoader from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast +from trl import IterativeConfig + from ..core import PPODecorators, set_seed -from . import IterativeConfig, RunningMoments class IterativeTrainer: """ - The IterativeTrainer can be used to finetune models with methods that require some steps between optimization. + The IterativeTrainer can be used to finetune models with methods that requires some steps between optimization. Attributes: **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. @@ -49,7 +50,7 @@ def __init__( model: PreTrainedModel = None, tokenizer: PreTrainedTokenizerBase = None, optimizer: Optional[torch.optim.Optimizer] = None, - data_collator: Optional[Callable] = None, + data_collator: Callable = None, ): """ Initialize IterativeTrainer. @@ -67,7 +68,7 @@ def __init__( Data collator function. """ - super().__init__(config) + self.config = config # initial seed for reproducible experiments set_seed(config.seed) @@ -133,7 +134,6 @@ def __init__( PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache - self.running = RunningMoments(self.accelerator) self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): From 2836807774fc25c319feb29e7dc8a9f532ed355c Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 19:49:59 -0400 Subject: [PATCH 06/38] init --- trl/trainer/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trl/trainer/__init__.py b/trl/trainer/__init__.py index 4d98135a1d..ad072763ab 100644 --- a/trl/trainer/__init__.py +++ b/trl/trainer/__init__.py @@ -36,6 +36,8 @@ from .ddpo_trainer import DDPOTrainer from .dpo_trainer import DPOTrainer +from .iterative_config import IterativeConfig +from .iterative_trainer import IterativeTrainer from .ppo_config import PPOConfig from .ppo_trainer import PPOTrainer from .reward_trainer import RewardTrainer, compute_accuracy From 3063a2fab0306303d4692bec01e055ca3dca037c Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 19:55:37 -0400 Subject: [PATCH 07/38] fix typo --- trl/trainer/iterative_trainer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index d9f1764f17..a58ac7e6b9 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -21,9 +21,8 @@ from torch.utils.data import DataLoader from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast -from trl import IterativeConfig - from ..core import PPODecorators, set_seed +from . import IterativeConfig class IterativeTrainer: From 23888e14fbef860071dffb77d5490989affdf538 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 4 Sep 2023 20:11:02 -0400 Subject: [PATCH 08/38] fix init typo --- trl/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/trl/__init__.py b/trl/__init__.py index 39cd0ffd32..60389b5a59 100644 --- a/trl/__init__.py +++ b/trl/__init__.py @@ -12,7 +12,16 @@ PreTrainedModelWrapper, create_reference_model, ) -from .trainer import DataCollatorForCompletionOnlyLM, DPOTrainer, PPOConfig, PPOTrainer, RewardTrainer, SFTTrainer +from .trainer import ( + DataCollatorForCompletionOnlyLM, + DPOTrainer, + IterativeConfig, + IterativeTrainer, + PPOConfig, + PPOTrainer, + RewardTrainer, + SFTTrainer, +) if is_diffusers_available(): From 3705578f770a483bf7c6146f449169d63a1ae892 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Thu, 14 Sep 2023 22:23:59 -0400 Subject: [PATCH 09/38] adding loggings and safety checker --- trl/trainer/iterative_trainer.py | 129 +++++++++++++++++++++++++------ 1 file changed, 105 insertions(+), 24 deletions(-) diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index a58ac7e6b9..cb62662095 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, List, Optional +import warnings +from typing import List, Optional import torch from accelerate import Accelerator @@ -19,7 +20,13 @@ from datasets import Dataset from torch.optim import Adam from torch.utils.data import DataLoader -from transformers import PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast +from transformers import ( + DataCollator, + DataCollatorForLanguageModeling, + DataCollatorForSeq2Seq, + PreTrainedModel, + PreTrainedTokenizerBase, +) from ..core import PPODecorators, set_seed from . import IterativeConfig @@ -31,15 +38,15 @@ class IterativeTrainer: Attributes: **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. - **model** (`PreTrainedModel`) -- Model to be optimized, Hugging Face transformer model with a causal language modeling head. - Check the documentation of `PreTrainedModelWrapper` for more details. + **model** (`PreTrainedModel`) -- Model to be optimized, either an 'AutoModelForCausalLM' or an 'AutoModelForSeq2SeqLM'. + Check the documentation of `PreTrainedModel` for more details. **tokenizer** (`PreTrainedTokenizerBase`) -- Tokenizer to be used for encoding the data. Check the documentation of `transformers.PreTrainedTokenizer` and `transformers.PreTrainedTokenizerFast` for more details. **optimizer** (`torch.optim.Optimizer`, *optional*) -- Optimizer to be used for training. If no optimizer is provided, the trainer will create an Adam optimizer with the learning rate specified in the configuration object. - **data_collator** (DataCollatorForLanguageModeling, *optional*) -- Data collator to be used for training and + **data_collator** (Union[DataCollatorForLanguageModeling, DataCollatorForSeq2Seq], *optional*) -- Data collator to be used for training and passed along the dataloader. """ @@ -49,7 +56,7 @@ def __init__( model: PreTrainedModel = None, tokenizer: PreTrainedTokenizerBase = None, optimizer: Optional[torch.optim.Optimizer] = None, - data_collator: Callable = None, + data_collator: Optional[DataCollator] = None, ): """ Initialize IterativeTrainer. @@ -60,10 +67,10 @@ def __init__( model (`PreTrainedModel`): Hugging Face transformer model. tokenizer (`transformers.PreTrainedTokenizerBase`): - Hugging Face tokenizer + Hugging Face tokenizer. optimizer (Optional[`torch.optim.Optimizer`]): - Optimizer used for training. If `None`, the `Adam` is used as default. - data_collator (Optional[function]): + Optimizer used for training. If `None`, `Adam` is used as default. + data_collator (Optional['DataCollator']): Data collator function. """ @@ -79,6 +86,13 @@ def __init__( raise ValueError( f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}" ) + if not isinstance(model, (PreTrainedModel)): + raise ValueError(f"model must be a PreTrainedModel, got {type(model)}") + if not model.can_generate(): + warnings.warn( + f"The current model class {type(model)} is not compatible with `.generate()`" + "Please make sure that this is intended." + ) # Step 1: Initialize Accelerator self.accelerator = Accelerator( @@ -101,13 +115,22 @@ def __init__( self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") self.is_peft_model = getattr(self.model, "is_peft_model", False) - if not (isinstance(tokenizer, PreTrainedTokenizer) or isinstance(tokenizer, PreTrainedTokenizerFast)): - raise ValueError( - "tokenizer must be a transformers.PreTrainedTokenizer or transformers.PreTrainedTokenizerFast" - ) self.tokenizer = tokenizer - self.data_collator = data_collator + if data_collator is None: + if self.is_encoder_decoder: + warnings.warn( + "No data collator is provided. Using 'DataCollatorForSeq2Seq' with" + "'labels_pad_token_id' set to '-100' and 'pad_to_multiple_of' set to 8." + ) + self.data_collator = DataCollatorForSeq2Seq( + tokenizer, model=self.model, label_pad_token_id=-100, pad_to_multiple_of=8 + ) + else: + warnings.warn("No data collator is provided. Using 'DataCollatorForLanguageModeling'") + self.data_collator = DataCollatorForLanguageModeling(self.tokenizer, mlm=False) + else: + self.data_collator = data_collator if optimizer is None: self.optimizer = Adam( @@ -117,10 +140,8 @@ def __init__( else: self.optimizer = optimizer - (self.model, self.optimizer, self.data_collator,) = self.accelerator.prepare( - self.model, - self.optimizer, - self.data_collator, + (self.model, self.optimizer, self.data_collator) = self.accelerator.prepare( + self.model, self.optimizer, self.data_collator ) self.is_distributed = self.accelerator.distributed_type == "MULTI_GPU" @@ -131,9 +152,10 @@ def __init__( else: self.current_device = torch.device("cuda:0") - PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache + # init the current step + self.current_step = 0 - self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") + PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): if self.is_encoder_decoder: @@ -144,14 +166,14 @@ def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Te ] ).to(self.model.device) + input_data.pop("decoder_input_ids", None) # This is directly computed inside the model + else: input_data = self.data_collator( [{"input_ids": ids, "attention_mask": torch.ones_like(ids)} for ids in labels] ).to(self.model.device) - input_data.pop("decoder_input_ids", None) # This is directly computed inside the model - - return input_data + return input_data def compute_loss(self, model, inputs, return_outputs=False): """ @@ -172,6 +194,31 @@ def compute_loss(self, model, inputs, return_outputs=False): return (loss, outputs) if return_outputs else loss + @staticmethod + def _step_safety_checker( + input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor] + ): + """ + Check if the input data is valid for training. + + Args: + input_ids (List[`torch.LongTensor`]): + List of tensors containing the input_ids + attention_mask (List[`torch.LongTensor`]): + List of tensors containing the attention_mask + labels (List[`torch.FloatTensor`]): + List of tensors containing the labels + Returns: + `tuple`: The input processed data. + """ + for name, tensor_list in zip(["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels]): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + + return input_ids, attention_mask, labels + @PPODecorators.empty_cuda_cache() def step( self, input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor] @@ -182,7 +229,7 @@ def step( input_ids (List[`torch.LongTensor`]): List of tensors containing the input_ids attention_mask (List[`torch.LongTensor`]): - List of tensors containing the attenton_mask + List of tensors containing the attention_mask labels (List[`torch.FloatTensor`]): List of tensors containing the labels (if set to None, will default to input_ids) Returns: @@ -190,6 +237,12 @@ def step( """ self.model.train() + if labels is None: + warnings.warn("No labels are provided. Setting labels to input_ids") + labels = input_ids + + input_ids, attention_mask, labels = self._step_safety_checker(input_ids, attention_mask, labels) + model_inputs = self.prepare_model_inputs(input_ids, attention_mask, labels) model_inputs_names = list(model_inputs.keys()) @@ -230,3 +283,31 @@ def collator(data): all_stats.append(dict(loss=dict(total=loss.detach()))) return all_stats + + def log_stats( + self, + stats: dict, + ): + """ + A function that logs all the training stats. + + Args: + stats (dict[str, Any]): + A dictionary of training stats. + """ + # Log only if we are in the main process + if self.accelerator.is_main_process: + logs = {} + + logs.update(stats) + + # manually cast in fp32 for bf16 torch tensors + for k, v in logs.items(): + if isinstance(v, torch.Tensor) and v.dtype == torch.bfloat16: + logs[k] = v.float() + + if self.config.log_with == "tensorboard": + # update the current step + self.current_step += 1 + + self.accelerator.log(logs, step=self.current_step if self.config.log_with == "tensorboard" else None) From aa842c3e3effd97b3281dec32515d5dddda54709 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Thu, 14 Sep 2023 23:19:31 -0400 Subject: [PATCH 10/38] fixed minor issues --- trl/trainer/iterative_config.py | 2 +- trl/trainer/iterative_trainer.py | 46 +++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/trl/trainer/iterative_config.py b/trl/trainer/iterative_config.py index 94eef16e7c..0c4e505a94 100644 --- a/trl/trainer/iterative_config.py +++ b/trl/trainer/iterative_config.py @@ -32,7 +32,7 @@ class IterativeConfig(object): metadata={"help": "Name of model to use - used only for tracking purposes"}, ) step_batch_size: Optional[int] = field( - default=256, metadata={"help": "Number of samples per optimisation step inside the step function"} + default=32, metadata={"help": "Number of samples per optimisation step inside the step function"} ) learning_rate: Optional[float] = field(default=1e-5, metadata={"help": "Adam learning rate"}) gradient_accumulation_steps: Optional[int] = field( diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index cb62662095..8f9672c1d6 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -158,11 +158,14 @@ def __init__( PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): + if attention_mask is None: + attention_mask = [torch.ones_like(ids) for ids in input_ids] + if self.is_encoder_decoder: input_data = self.data_collator( [ - {"input_ids": i, "attention_mask": a, "labels": l} - for i, a, l in zip(input_ids, attention_mask, labels) + {"input_ids": ids, "attention_mask": att, "labels": lab} + for ids, att, lab in zip(input_ids, attention_mask, labels) ] ).to(self.model.device) @@ -170,7 +173,7 @@ def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Te else: input_data = self.data_collator( - [{"input_ids": ids, "attention_mask": torch.ones_like(ids)} for ids in labels] + [{"input_ids": ids, "attention_mask": att} for ids, att in zip(input_ids, attention_mask)] ).to(self.model.device) return input_data @@ -211,26 +214,38 @@ def _step_safety_checker( Returns: `tuple`: The input processed data. """ - for name, tensor_list in zip(["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels]): - if not isinstance(tensor_list, list): - raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") - if not isinstance(tensor_list[0], torch.Tensor): - raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + if attention_mask is None: + for name, tensor_list in zip(["input_ids", "labels"], [input_ids, labels]): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + else: + for name, tensor_list in zip( + ["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels] + ): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") return input_ids, attention_mask, labels @PPODecorators.empty_cuda_cache() def step( - self, input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor] + self, + input_ids: List[torch.LongTensor], + attention_mask: Optional[List[torch.LongTensor]], + labels: Optional[List[torch.LongTensor]], ): """ Run an optimisation step given a list of input_ids, attention_mask, and labels. Args: input_ids (List[`torch.LongTensor`]): List of tensors containing the input_ids - attention_mask (List[`torch.LongTensor`]): + attention_mask (List[`torch.LongTensor`], , *optional*): List of tensors containing the attention_mask - labels (List[`torch.FloatTensor`]): + labels (List[`torch.FloatTensor`], *optional*): List of tensors containing the labels (if set to None, will default to input_ids) Returns: `dict[str, Any]`: A summary of the training statistics @@ -238,8 +253,13 @@ def step( self.model.train() if labels is None: - warnings.warn("No labels are provided. Setting labels to input_ids") - labels = input_ids + if self.is_encoder_decoder: + raise ValueError( + "No labels are provided. When using an encoder-decoder architecture," "labels must be passed." + ) + else: + warnings.warn("No labels are provided. Setting labels to input_ids") + labels = input_ids input_ids, attention_mask, labels = self._step_safety_checker(input_ids, attention_mask, labels) From f2673e79cb9a9ddbbdb88983045c2be599861504 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Thu, 14 Sep 2023 23:21:00 -0400 Subject: [PATCH 11/38] doc --- docs/source/iterative_trainer.mdx | 55 +++++++++++++++++++++++++++++++ docs/source/trainer.mdx | 12 ++++--- 2 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 docs/source/iterative_trainer.mdx diff --git a/docs/source/iterative_trainer.mdx b/docs/source/iterative_trainer.mdx new file mode 100644 index 0000000000..ae7e78b87c --- /dev/null +++ b/docs/source/iterative_trainer.mdx @@ -0,0 +1,55 @@ +# Iterative Trainer + +Iterative fine-tuning is a training method that enables to perform custom actions (generation and filtering for example) between optimization steps. In TRL we provide an easy-to-use API to fine-tune your models in an iterative way in just a few lines of code. + +## Usage + +To get started quickly, instantiate an instance of the class with an 'IterativeConfig', a model, and a tokenizer. + +```python + +model = AutoModelForCausalLM.from_pretrained(model_name) +tokenizer = AutoTokenizer.from_pretrained(model_name) +tokenizer.pad_token = tokenizer.eos_token + +config = IterativeConfig( + model_name=model_name, + log_with=log_with, + project_kwargs={"logging_dir":logging_dir} +) + +trainer = IterativeTrainer( + config, + model, + tokenizer +) + +``` + +And assuming you have a list of tensors of input_ids and attention_mask, you can fine-tune your models on those samples calling the step method + +```python + +trainer.step(input_ids, attention_mask, None) + +``` + +For causal language models, labels will automatically be created from input_ids. When using sequence to sequence models you will have to provide your own labels. +The default step batch size is 32, but you can change it at the time of instance initialization of the 'IterativeConfig' like so + +```python + +config = IterativeConfig( + model_name=model_name, + step_batch_size=step_batch_size, + log_with=log_with, + project_kwargs={"logging_dir":logging_dir} +) + +``` + +## IterativeTrainer + +[[autodoc]] IterativeTrainer + +[[autodoc]] IterativeConfig diff --git a/docs/source/trainer.mdx b/docs/source/trainer.mdx index bec27c7970..bddf75bdd4 100644 --- a/docs/source/trainer.mdx +++ b/docs/source/trainer.mdx @@ -12,10 +12,6 @@ We also support a `RewardTrainer` that can be used to train a reward model. [[autodoc]] PPOTrainer -## RewardConfig - -[[autodoc]] RewardConfig - ## RewardTrainer [[autodoc]] RewardTrainer @@ -36,6 +32,14 @@ We also support a `RewardTrainer` that can be used to train a reward model. [[autodoc]] DDPOTrainer +## IterativeTrainer + +[[autodoc]] IterativeTrainer + +## IterativeConfig + +[[autodoc]] IterativeConfig + ## set_seed [[autodoc]] set_seed From 2de238820cb969806df013cd9ca36d9e96bbb999 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Thu, 14 Sep 2023 23:29:31 -0400 Subject: [PATCH 12/38] table of contents update --- docs/source/_toctree.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 11795be496..938b479b95 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -29,6 +29,8 @@ title: DPO Trainer - local: ddpo_trainer title: Denoising Diffusion Policy Optimization + - local: iterative_trainer + title: Iterative Trainer - local: text_environments title: Text Environments title: API From b61cc9ae12bd9bcd9634b5e2038a695170e1a4e5 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 18 Sep 2023 18:39:44 -0400 Subject: [PATCH 13/38] add test for seq2seq2 models --- docs/source/trainer.mdx | 4 ++++ tests/test_iterative_trainer.py | 28 ++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/docs/source/trainer.mdx b/docs/source/trainer.mdx index bddf75bdd4..78c4013ea4 100644 --- a/docs/source/trainer.mdx +++ b/docs/source/trainer.mdx @@ -12,6 +12,10 @@ We also support a `RewardTrainer` that can be used to train a reward model. [[autodoc]] PPOTrainer +## RewardConfig + +[[autodoc]] RewardConfig + ## RewardTrainer [[autodoc]] RewardTrainer diff --git a/tests/test_iterative_trainer.py b/tests/test_iterative_trainer.py index 85be6ed248..8117592cfb 100644 --- a/tests/test_iterative_trainer.py +++ b/tests/test_iterative_trainer.py @@ -15,7 +15,8 @@ import torch from datasets import Dataset -from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling +from parameterized import parameterized +from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer from trl import IterativeConfig, IterativeTrainer @@ -28,6 +29,11 @@ def setUpClass(cls): cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_id) cls.tokenizer.pad_token = cls.tokenizer.eos_token + # get t5 as seq2seq example: + model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab" + cls.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) + cls.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) + def _init_dummy_dataset(self): dummy_dataset_dict = { "input_ids": [torch.tensor([5303, 3621]), torch.tensor([3666, 1438, 318]), torch.tensor([5303, 3621])], @@ -43,16 +49,26 @@ def setUp(self): # initialize trainer self.iterative_config = IterativeConfig(step_batch_size=2, log_with=None) self.model.train() - self.data_collator = DataCollatorForLanguageModeling(self.tokenizer, mlm=False) return super().setUp() - def test_iterative_step(self): + @parameterized.expand( + [ + ["gpt2"], + ["t5"], + ] + ) + def test_iterative_step(self, name): # initialize dataset dummy_dataset = self._init_dummy_dataset() - iterative_trainer = IterativeTrainer( - config=self.iterative_config, model=self.model, tokenizer=self.tokenizer, data_collator=self.data_collator - ) + if name == "gpt2": + model = self.model + tokenizer = self.tokenizer + else: + model = self.t5_model + tokenizer = self.t5_tokenizer + + iterative_trainer = IterativeTrainer(config=self.iterative_config, model=model, tokenizer=tokenizer) iterative_trainer.step(dummy_dataset["input_ids"], dummy_dataset["attention_mask"], dummy_dataset["labels"]) From a8eee4b3c6837b9328ac5070bc094500dfb942cb Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Mon, 18 Sep 2023 18:40:35 -0400 Subject: [PATCH 14/38] change year --- trl/trainer/iterative_config.py | 2 +- trl/trainer/iterative_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/iterative_config.py b/trl/trainer/iterative_config.py index 0c4e505a94..4b65ae2ff5 100644 --- a/trl/trainer/iterative_config.py +++ b/trl/trainer/iterative_config.py @@ -1,4 +1,4 @@ -# Copyright 2022 The HuggingFace Team. All rights reserved. +# Copyright 2023 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_trainer.py index 8f9672c1d6..9b85267433 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_trainer.py @@ -1,4 +1,4 @@ -# Copyright 2022 The HuggingFace Team. All rights reserved. +# Copyright 2023 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 917c2eb05271c28d3a427feac160fb508b3bcbf2 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 14:56:09 -0400 Subject: [PATCH 15/38] adding text as step input --- docs/source/_toctree.yml | 4 +- ..._trainer.mdx => iterative_sft_trainer.mdx} | 41 +++-- docs/source/trainer.mdx | 8 +- ...ainer.py => test_iterative_sft_trainer.py} | 44 ++++-- trl/__init__.py | 4 +- trl/trainer/__init__.py | 4 +- ...tive_config.py => iterative_sft_config.py} | 4 +- ...ve_trainer.py => iterative_sft_trainer.py} | 147 ++++++++++++------ 8 files changed, 179 insertions(+), 77 deletions(-) rename docs/source/{iterative_trainer.mdx => iterative_sft_trainer.mdx} (54%) rename tests/{test_iterative_trainer.py => test_iterative_sft_trainer.py} (60%) rename trl/trainer/{iterative_config.py => iterative_sft_config.py} (97%) rename trl/trainer/{iterative_trainer.py => iterative_sft_trainer.py} (66%) diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 938b479b95..3115fab292 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -29,8 +29,8 @@ title: DPO Trainer - local: ddpo_trainer title: Denoising Diffusion Policy Optimization - - local: iterative_trainer - title: Iterative Trainer + - local: iterative_sft_trainer + title: Iterative Supervised Fine-Tuning - local: text_environments title: Text Environments title: API diff --git a/docs/source/iterative_trainer.mdx b/docs/source/iterative_sft_trainer.mdx similarity index 54% rename from docs/source/iterative_trainer.mdx rename to docs/source/iterative_sft_trainer.mdx index ae7e78b87c..908d2a9013 100644 --- a/docs/source/iterative_trainer.mdx +++ b/docs/source/iterative_sft_trainer.mdx @@ -1,10 +1,10 @@ -# Iterative Trainer +# Iterative Supervised Fine-tuning Trainer Iterative fine-tuning is a training method that enables to perform custom actions (generation and filtering for example) between optimization steps. In TRL we provide an easy-to-use API to fine-tune your models in an iterative way in just a few lines of code. ## Usage -To get started quickly, instantiate an instance of the class with an 'IterativeConfig', a model, and a tokenizer. +To get started quickly, instantiate an instance of the class with an 'IterativeSFTConfig', a model, and a tokenizer. ```python @@ -12,13 +12,13 @@ model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token -config = IterativeConfig( +config = IterativeSFTConfig( model_name=model_name, log_with=log_with, project_kwargs={"logging_dir":logging_dir} ) -trainer = IterativeTrainer( +trainer = IterativeSFTTrainer( config, model, tokenizer @@ -26,20 +26,39 @@ trainer = IterativeTrainer( ``` -And assuming you have a list of tensors of input_ids and attention_mask, you can fine-tune your models on those samples calling the step method +You have the choice to either provide a list of strings or a list of tensors to the step function. + +#### Using a list of tensors as input: ```python -trainer.step(input_ids, attention_mask, None) +inputs = { + "input_ids": input_ids, + "attention_mask": attention_mask +} + +trainer.step(**inputs) + +``` + +#### Using a list of strings as input: + +```python + +inputs = { + "texts": texts +} + +trainer.step(**inputs) ``` -For causal language models, labels will automatically be created from input_ids. When using sequence to sequence models you will have to provide your own labels. -The default step batch size is 32, but you can change it at the time of instance initialization of the 'IterativeConfig' like so +For causal language models, labels will automatically be created from input_ids or from texts. When using sequence to sequence models you will have to provide your own labels or text_labels. +The default step batch size is 32, but you can change it at the time of instance initialization of the 'IterativeSFTConfig' like so ```python -config = IterativeConfig( +config = IterativeSFTConfig( model_name=model_name, step_batch_size=step_batch_size, log_with=log_with, @@ -50,6 +69,6 @@ config = IterativeConfig( ## IterativeTrainer -[[autodoc]] IterativeTrainer +[[autodoc]] IterativeSFTTrainer -[[autodoc]] IterativeConfig +[[autodoc]] IterativeSFTConfig diff --git a/docs/source/trainer.mdx b/docs/source/trainer.mdx index 78c4013ea4..599b602016 100644 --- a/docs/source/trainer.mdx +++ b/docs/source/trainer.mdx @@ -36,13 +36,13 @@ We also support a `RewardTrainer` that can be used to train a reward model. [[autodoc]] DDPOTrainer -## IterativeTrainer +## IterativeSFTTrainer -[[autodoc]] IterativeTrainer +[[autodoc]] IterativeSFTTrainer -## IterativeConfig +## IterativeSFTConfig -[[autodoc]] IterativeConfig +[[autodoc]] IterativeSFTConfig ## set_seed diff --git a/tests/test_iterative_trainer.py b/tests/test_iterative_sft_trainer.py similarity index 60% rename from tests/test_iterative_trainer.py rename to tests/test_iterative_sft_trainer.py index 8117592cfb..41c73065d6 100644 --- a/tests/test_iterative_trainer.py +++ b/tests/test_iterative_sft_trainer.py @@ -18,7 +18,7 @@ from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer -from trl import IterativeConfig, IterativeTrainer +from trl import IterativeSFTConfig, IterativeSFTTrainer class IterativeTrainerTester(unittest.TestCase): @@ -34,7 +34,7 @@ def setUpClass(cls): cls.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) cls.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) - def _init_dummy_dataset(self): + def _init_tensor_dummy_dataset(self): dummy_dataset_dict = { "input_ids": [torch.tensor([5303, 3621]), torch.tensor([3666, 1438, 318]), torch.tensor([5303, 3621])], "attention_mask": [torch.tensor([1, 1]), torch.tensor([1, 1, 1]), torch.tensor([1, 1])], @@ -45,32 +45,56 @@ def _init_dummy_dataset(self): dummy_dataset.set_format("torch") return dummy_dataset + def _init_textual_dummy_dataset(self): + dummy_dataset_dict = { + "texts": ["Testing the IterativeSFTTrainer.", "This is a test of the IterativeSFTTrainer"], + "texts_labels": ["Testing the IterativeSFTTrainer.", "This is a test of the IterativeSFTTrainer"], + } + + dummy_dataset = Dataset.from_dict(dummy_dataset_dict) + dummy_dataset.set_format("torch") + return dummy_dataset + def setUp(self): # initialize trainer - self.iterative_config = IterativeConfig(step_batch_size=2, log_with=None) + self.iterative_config = IterativeSFTConfig(step_batch_size=2, log_with=None) self.model.train() return super().setUp() @parameterized.expand( [ - ["gpt2"], - ["t5"], + ["gpt2", "tensor"], + ["gpt2", "text"], + ["t5", "tensor"], + ["t5", "text"], ] ) - def test_iterative_step(self, name): + def test_iterative_step_from_tensor(self, model_name, input_name): # initialize dataset - dummy_dataset = self._init_dummy_dataset() + if input_name == "tensor": + dummy_dataset = self._init_tensor_dummy_dataset() + inputs = { + "input_ids": dummy_dataset["input_ids"], + "attention_mask": dummy_dataset["attention_mask"], + "labels": dummy_dataset["labels"], + } + else: + dummy_dataset = self._init_textual_dummy_dataset() + inputs = { + "texts": dummy_dataset["texts"], + "texts_labels": dummy_dataset["texts_labels"], + } - if name == "gpt2": + if model_name == "gpt2": model = self.model tokenizer = self.tokenizer else: model = self.t5_model tokenizer = self.t5_tokenizer - iterative_trainer = IterativeTrainer(config=self.iterative_config, model=model, tokenizer=tokenizer) + iterative_trainer = IterativeSFTTrainer(config=self.iterative_config, model=model, tokenizer=tokenizer) - iterative_trainer.step(dummy_dataset["input_ids"], dummy_dataset["attention_mask"], dummy_dataset["labels"]) + iterative_trainer.step(**inputs) for param in iterative_trainer.model.parameters(): assert param.grad is not None diff --git a/trl/__init__.py b/trl/__init__.py index 0ea8332e6f..16388fd398 100644 --- a/trl/__init__.py +++ b/trl/__init__.py @@ -15,8 +15,8 @@ from .trainer import ( DataCollatorForCompletionOnlyLM, DPOTrainer, - IterativeConfig, - IterativeTrainer, + IterativeSFTConfig, + IterativeSFTTrainer, PPOConfig, PPOTrainer, RewardConfig, diff --git a/trl/trainer/__init__.py b/trl/trainer/__init__.py index edf888b95a..9cf852144d 100644 --- a/trl/trainer/__init__.py +++ b/trl/trainer/__init__.py @@ -36,8 +36,8 @@ from .ddpo_trainer import DDPOTrainer from .dpo_trainer import DPOTrainer -from .iterative_config import IterativeConfig -from .iterative_trainer import IterativeTrainer +from .iterative_sft_config import IterativeSFTConfig +from .iterative_sft_trainer import IterativeSFTTrainer from .ppo_config import PPOConfig from .ppo_trainer import PPOTrainer from .reward_trainer import RewardTrainer, compute_accuracy diff --git a/trl/trainer/iterative_config.py b/trl/trainer/iterative_sft_config.py similarity index 97% rename from trl/trainer/iterative_config.py rename to trl/trainer/iterative_sft_config.py index 4b65ae2ff5..e5b4003729 100644 --- a/trl/trainer/iterative_config.py +++ b/trl/trainer/iterative_sft_config.py @@ -18,9 +18,9 @@ @dataclass -class IterativeConfig(object): +class IterativeSFTConfig(object): """ - Configuration class for PPOTrainer + Configuration class for IterativeSFTTrainer """ task_name: Optional[str] = field( diff --git a/trl/trainer/iterative_trainer.py b/trl/trainer/iterative_sft_trainer.py similarity index 66% rename from trl/trainer/iterative_trainer.py rename to trl/trainer/iterative_sft_trainer.py index 9b85267433..c5b6087ad0 100644 --- a/trl/trainer/iterative_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -29,12 +29,13 @@ ) from ..core import PPODecorators, set_seed -from . import IterativeConfig +from ..import_utils import is_torch_greater_2_0 +from . import IterativeSFTConfig -class IterativeTrainer: +class IterativeSFTTrainer: """ - The IterativeTrainer can be used to finetune models with methods that requires some steps between optimization. + The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization. Attributes: **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. @@ -48,18 +49,20 @@ class IterativeTrainer: object. **data_collator** (Union[DataCollatorForLanguageModeling, DataCollatorForSeq2Seq], *optional*) -- Data collator to be used for training and passed along the dataloader. + **lr_scheduler** (`torch.optim.lr_scheduler`, *optional*) -- Learning rate scheduler to be used for training. """ def __init__( self, - config: IterativeConfig = None, + config: IterativeSFTConfig = None, model: PreTrainedModel = None, tokenizer: PreTrainedTokenizerBase = None, optimizer: Optional[torch.optim.Optimizer] = None, data_collator: Optional[DataCollator] = None, + lr_scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None, ): """ - Initialize IterativeTrainer. + Initialize IterativeSFTTrainer. Args: config (`IterativeConfig`): @@ -72,6 +75,8 @@ def __init__( Optimizer used for training. If `None`, `Adam` is used as default. data_collator (Optional['DataCollator']): Data collator function. + lr_scheduler (Optional[`torch.optim.lr_scheduler`]): + Learning rate scheduler used for training. """ self.config = config @@ -86,7 +91,7 @@ def __init__( raise ValueError( f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}" ) - if not isinstance(model, (PreTrainedModel)): + if not isinstance(model, PreTrainedModel): raise ValueError(f"model must be a PreTrainedModel, got {type(model)}") if not model.can_generate(): warnings.warn( @@ -140,8 +145,21 @@ def __init__( else: self.optimizer = optimizer - (self.model, self.optimizer, self.data_collator) = self.accelerator.prepare( - self.model, self.optimizer, self.data_collator + self.lr_scheduler = lr_scheduler + if self.lr_scheduler is not None: + lr_scheduler_class = ( + torch.optim.lr_scheduler._LRScheduler + if not is_torch_greater_2_0() + else torch.optim.lr_scheduler.LRScheduler + ) + + if not isinstance(self.lr_scheduler, lr_scheduler_class): + raise ValueError( + "lr_scheduler must be a torch.optim.lr_scheduler._LRScheduler or torch.optim.lr_scheduler.LRScheduler (for torch >= 2.0)" + ) + + (self.model, self.optimizer, self.data_collator, self.lr_scheduler) = self.accelerator.prepare( + self.model, self.optimizer, self.data_collator, self.lr_scheduler ) self.is_distributed = self.accelerator.distributed_type == "MULTI_GPU" @@ -199,7 +217,7 @@ def compute_loss(self, model, inputs, return_outputs=False): @staticmethod def _step_safety_checker( - input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor] + input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor], texts: List[str], texts_labels: List[str] ): """ Check if the input data is valid for training. @@ -211,58 +229,97 @@ def _step_safety_checker( List of tensors containing the attention_mask labels (List[`torch.FloatTensor`]): List of tensors containing the labels + texts (List[`str`]): + List of string containing the text input. + texts_labels (List[`str`]): + List of string containing the text labels. Returns: - `tuple`: The input processed data. + `tuple`: The input data. """ - if attention_mask is None: - for name, tensor_list in zip(["input_ids", "labels"], [input_ids, labels]): - if not isinstance(tensor_list, list): - raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") - if not isinstance(tensor_list[0], torch.Tensor): - raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + if texts is None: + if attention_mask is None: + for name, tensor_list in zip(["input_ids", "labels"], [input_ids, labels]): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + else: + for name, tensor_list in zip( + ["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels] + ): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") else: - for name, tensor_list in zip( - ["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels] - ): - if not isinstance(tensor_list, list): - raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") - if not isinstance(tensor_list[0], torch.Tensor): - raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") - - return input_ids, attention_mask, labels + if not isinstance(texts, list): + raise ValueError(f"'text' must be a list of strings - got {type(texts)}") + if not isinstance(texts[0], torch.Tensor): + raise ValueError(f"Elements in 'text' must be strings - got {type(texts[0])}") + if texts_labels is not None: + if not isinstance(texts_labels, list): + raise ValueError(f"'text_labels' must be a list of strings - got {type(texts_labels)}") + if not isinstance(texts_labels[0], torch.Tensor): + raise ValueError(f"Elements in 'text_labels' must be strings - got {type(texts_labels[0])}") + + return input_ids, attention_mask, labels, texts, texts_labels @PPODecorators.empty_cuda_cache() - def step( - self, - input_ids: List[torch.LongTensor], - attention_mask: Optional[List[torch.LongTensor]], - labels: Optional[List[torch.LongTensor]], - ): + def step(self, **kwargs): """ - Run an optimisation step given a list of input_ids, attention_mask, and labels. - Args: + Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and text_labels. + Keyword Args: input_ids (List[`torch.LongTensor`]): - List of tensors containing the input_ids + List of tensors containing the input_ids (if not provided, text will be used) attention_mask (List[`torch.LongTensor`], , *optional*): List of tensors containing the attention_mask labels (List[`torch.FloatTensor`], *optional*): List of tensors containing the labels (if set to None, will default to input_ids) + texts (List[`torch.FloatTensor`], *optional*): + List of strings containing the text input (if not provided, input_ids will directly be used) + texts_labels (List[`torch.FloatTensor`], *optional*): + List of strings containing the text labels (if set to None, will default to text) Returns: `dict[str, Any]`: A summary of the training statistics """ self.model.train() - if labels is None: - if self.is_encoder_decoder: - raise ValueError( - "No labels are provided. When using an encoder-decoder architecture," "labels must be passed." + + input_ids = kwargs.get("input_ids", None) + attention_mask = kwargs.get("attention_mask", None) + labels = kwargs.get("labels", None) + texts = kwargs.get("texts", None) + texts_labels = kwargs.get("texts_labels", None) + + if input_ids is None and texts is None: + raise ValueError( + "Step should include `input_ids` or `texts` as keyword arguments." + ) + elif input_ids is not None and texts is not None: + warnings.warn( + "Both 'input_ids' and 'texts' are provided. 'input_ids' will be overwritten using inputs provided by the 'texts' keyword argument." + ) + + if ( + labels is None + and texts_labels is None + and self.is_encoder_decoder + ): + raise ValueError( + "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." ) - else: - warnings.warn("No labels are provided. Setting labels to input_ids") - labels = input_ids - - input_ids, attention_mask, labels = self._step_safety_checker(input_ids, attention_mask, labels) - + + input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker(input_ids, attention_mask, labels, texts, texts_labels) + + if texts is not None: + input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"] for text in texts] + if texts_labels is not None: + labels = [self.tokenizer(text_labels, return_tensors="pt")["input_ids"] for text_labels in texts_labels] + + if labels is None: + warnings.warn("No labels are provided. Setting labels to input_ids") + labels = input_ids + model_inputs = self.prepare_model_inputs(input_ids, attention_mask, labels) model_inputs_names = list(model_inputs.keys()) @@ -298,6 +355,8 @@ def collator(data): self.optimizer.step() self.optimizer.zero_grad() + if self.lr_scheduler is not None: + self.lr_scheduler.step() # update stats etc all_stats.append(dict(loss=dict(total=loss.detach()))) From bae3ca388102a1dde44057b86c7a34ea05881659 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 15:05:43 -0400 Subject: [PATCH 16/38] precommit --- trl/trainer/iterative_sft_trainer.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index c5b6087ad0..d10c4d2971 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -85,7 +85,7 @@ def __init__( set_seed(config.seed) # Step 0: check positional arguments validity - if not isinstance(config, IterativeConfig): + if not isinstance(config, IterativeSFTConfig): raise ValueError(f"config must be a IterativeConfig, got {type(config)}") if not isinstance(tokenizer, (PreTrainedTokenizerBase)): raise ValueError( @@ -217,7 +217,11 @@ def compute_loss(self, model, inputs, return_outputs=False): @staticmethod def _step_safety_checker( - input_ids: List[torch.LongTensor], attention_mask: List[torch.LongTensor], labels: List[torch.LongTensor], texts: List[str], texts_labels: List[str] + input_ids: List[torch.LongTensor], + attention_mask: List[torch.LongTensor], + labels: List[torch.LongTensor], + texts: List[str], + texts_labels: List[str] ): """ Check if the input data is valid for training. @@ -292,24 +296,20 @@ def step(self, **kwargs): texts_labels = kwargs.get("texts_labels", None) if input_ids is None and texts is None: - raise ValueError( - "Step should include `input_ids` or `texts` as keyword arguments." - ) + raise ValueError("Step should include `input_ids` or `texts` as keyword arguments.") elif input_ids is not None and texts is not None: warnings.warn( "Both 'input_ids' and 'texts' are provided. 'input_ids' will be overwritten using inputs provided by the 'texts' keyword argument." ) - if ( - labels is None - and texts_labels is None - and self.is_encoder_decoder - ): + if labels is None and texts_labels is None and self.is_encoder_decoder: raise ValueError( "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." ) - input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker(input_ids, attention_mask, labels, texts, texts_labels) + input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker( + input_ids, attention_mask, labels, texts, texts_labels + ) if texts is not None: input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"] for text in texts] From 173a34be0d80aa4f554e07c89ee4c371c096da00 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 15:08:11 -0400 Subject: [PATCH 17/38] fixing typo --- trl/trainer/iterative_sft_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index d10c4d2971..be70fa8b0a 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -304,8 +304,8 @@ def step(self, **kwargs): if labels is None and texts_labels is None and self.is_encoder_decoder: raise ValueError( - "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." - ) + "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." + ) input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker( input_ids, attention_mask, labels, texts, texts_labels From f6e188e8aa18e8cf5d98a7fc60f7069de763dcf4 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 15:13:44 -0400 Subject: [PATCH 18/38] run precommit --- trl/trainer/iterative_sft_trainer.py | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index be70fa8b0a..5d3685d76b 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -157,7 +157,7 @@ def __init__( raise ValueError( "lr_scheduler must be a torch.optim.lr_scheduler._LRScheduler or torch.optim.lr_scheduler.LRScheduler (for torch >= 2.0)" ) - + (self.model, self.optimizer, self.data_collator, self.lr_scheduler) = self.accelerator.prepare( self.model, self.optimizer, self.data_collator, self.lr_scheduler ) @@ -217,12 +217,13 @@ def compute_loss(self, model, inputs, return_outputs=False): @staticmethod def _step_safety_checker( - input_ids: List[torch.LongTensor], - attention_mask: List[torch.LongTensor], - labels: List[torch.LongTensor], - texts: List[str], - texts_labels: List[str] + input_ids: List[torch.LongTensor], + attention_mask: List[torch.LongTensor], + labels: List[torch.LongTensor], + texts: List[str], + texts_labels: List[str], ): + """ Check if the input data is valid for training. @@ -288,38 +289,38 @@ def step(self, **kwargs): """ self.model.train() - + input_ids = kwargs.get("input_ids", None) attention_mask = kwargs.get("attention_mask", None) labels = kwargs.get("labels", None) texts = kwargs.get("texts", None) texts_labels = kwargs.get("texts_labels", None) - + if input_ids is None and texts is None: raise ValueError("Step should include `input_ids` or `texts` as keyword arguments.") elif input_ids is not None and texts is not None: warnings.warn( "Both 'input_ids' and 'texts' are provided. 'input_ids' will be overwritten using inputs provided by the 'texts' keyword argument." ) - + if labels is None and texts_labels is None and self.is_encoder_decoder: raise ValueError( "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." ) - + input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker( input_ids, attention_mask, labels, texts, texts_labels - ) - + ) + if texts is not None: input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"] for text in texts] if texts_labels is not None: labels = [self.tokenizer(text_labels, return_tensors="pt")["input_ids"] for text_labels in texts_labels] - + if labels is None: warnings.warn("No labels are provided. Setting labels to input_ids") labels = input_ids - + model_inputs = self.prepare_model_inputs(input_ids, attention_mask, labels) model_inputs_names = list(model_inputs.keys()) From cfd835f30ea970d173a0d910185997d8321748ae Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 15:23:23 -0400 Subject: [PATCH 19/38] fixing typo in safety checker --- trl/trainer/iterative_sft_trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 5d3685d76b..9837427908 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -259,12 +259,12 @@ def _step_safety_checker( else: if not isinstance(texts, list): raise ValueError(f"'text' must be a list of strings - got {type(texts)}") - if not isinstance(texts[0], torch.Tensor): + if not isinstance(texts[0], str): raise ValueError(f"Elements in 'text' must be strings - got {type(texts[0])}") if texts_labels is not None: if not isinstance(texts_labels, list): raise ValueError(f"'text_labels' must be a list of strings - got {type(texts_labels)}") - if not isinstance(texts_labels[0], torch.Tensor): + if not isinstance(texts_labels[0], str): raise ValueError(f"Elements in 'text_labels' must be strings - got {type(texts_labels[0])}") return input_ids, attention_mask, labels, texts, texts_labels @@ -280,9 +280,9 @@ def step(self, **kwargs): List of tensors containing the attention_mask labels (List[`torch.FloatTensor`], *optional*): List of tensors containing the labels (if set to None, will default to input_ids) - texts (List[`torch.FloatTensor`], *optional*): + texts (List[`str`], *optional*): List of strings containing the text input (if not provided, input_ids will directly be used) - texts_labels (List[`torch.FloatTensor`], *optional*): + texts_labels (List[`str`], *optional*): List of strings containing the text labels (if set to None, will default to text) Returns: `dict[str, Any]`: A summary of the training statistics From ede0ccc93ea7ab02205fcef7e4408f4124c56c01 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Sun, 8 Oct 2023 15:54:24 -0400 Subject: [PATCH 20/38] fix text tokenization issue --- trl/trainer/iterative_sft_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 9837427908..230a1e3cb0 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -313,9 +313,9 @@ def step(self, **kwargs): ) if texts is not None: - input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"] for text in texts] + input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"][0] for text in texts] if texts_labels is not None: - labels = [self.tokenizer(text_labels, return_tensors="pt")["input_ids"] for text_labels in texts_labels] + labels = [self.tokenizer(text_labels, return_tensors="pt")["input_ids"][0] for text_labels in texts_labels] if labels is None: warnings.warn("No labels are provided. Setting labels to input_ids") From 11c69e229330ebca51a860676639ebf503356b28 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 21:42:50 -0400 Subject: [PATCH 21/38] add truncate and inherit from trainer --- trl/trainer/iterative_sft_config.py | 79 ----------- trl/trainer/iterative_sft_trainer.py | 191 ++++++++++++--------------- 2 files changed, 81 insertions(+), 189 deletions(-) delete mode 100644 trl/trainer/iterative_sft_config.py diff --git a/trl/trainer/iterative_sft_config.py b/trl/trainer/iterative_sft_config.py deleted file mode 100644 index e5b4003729..0000000000 --- a/trl/trainer/iterative_sft_config.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2023 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from dataclasses import dataclass, field -from typing import Optional - -from ..core import flatten_dict - - -@dataclass -class IterativeSFTConfig(object): - """ - Configuration class for IterativeSFTTrainer - """ - - task_name: Optional[str] = field( - default=None, - metadata={"help": "Name of task to use - used only for tracking purposes"}, - ) - model_name: Optional[str] = field( - default=None, - metadata={"help": "Name of model to use - used only for tracking purposes"}, - ) - step_batch_size: Optional[int] = field( - default=32, metadata={"help": "Number of samples per optimisation step inside the step function"} - ) - learning_rate: Optional[float] = field(default=1e-5, metadata={"help": "Adam learning rate"}) - gradient_accumulation_steps: Optional[int] = field( - default=1, metadata={"help": "The number of gradient accumulation steps"} - ) - remove_unused_columns: Optional[bool] = field( - default=True, - metadata={"help": "Remove unused columns from the dataset if `datasets.Dataset` is used"}, - ) - log_with: Optional[str] = field( - default=None, - metadata={ - "help": "Log with either 'wandb' or 'tensorboard', check https://huggingface.co/docs/accelerate/usage_guides/tracking for more details" - }, - ) - tracker_kwargs: Optional[dict] = field( - default_factory=dict, - metadata={"help": "Keyword arguments for the tracker (e.g. wandb_project)"}, - ) - accelerator_kwargs: Optional[dict] = field( - default_factory=dict, - metadata={"help": "Keyword arguments for the accelerator"}, - ) - project_kwargs: Optional[dict] = field( - default_factory=dict, - metadata={"help": "Keyword arguments for the accelerator project config (e.g. `logging_dir`)"}, - ) - tracker_project_name: Optional[str] = field( - default="trl", metadata={"help": "Name of project to use for tracking"} - ) - max_grad_norm: Optional[float] = field( - default=None, metadata={"help": "Maximum gradient norm for gradient clipping"} - ) - seed: Optional[int] = field(default=0, metadata={"help": "Seed value for random generations"}) - optimize_cuda_cache: Optional[bool] = field( - default=False, - metadata={"help": "Optimize CUDA cache for slightly more memory-efficient training"}, - ) - - def to_dict(self): - output_dict = {} - for key, value in self.__dict__.items(): - output_dict[key] = value - return flatten_dict(output_dict) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 230a1e3cb0..915b212ac4 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import warnings -from typing import List, Optional +from typing import Callable, Dict, List, Optional, Tuple, Union import torch -from accelerate import Accelerator -from accelerate.utils import ProjectConfiguration from datasets import Dataset -from torch.optim import Adam from torch.utils.data import DataLoader from transformers import ( DataCollator, @@ -26,67 +23,54 @@ DataCollatorForSeq2Seq, PreTrainedModel, PreTrainedTokenizerBase, + Trainer, + TrainingArguments, ) +from transformers.trainer_utils import EvalLoopOutput -from ..core import PPODecorators, set_seed -from ..import_utils import is_torch_greater_2_0 -from . import IterativeSFTConfig +from ..core import PPODecorators -class IterativeSFTTrainer: +class IterativeSFTTrainer(Trainer): """ The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization. Attributes: - **config** (`IterativeConfig`) -- Configuration object for IterativeTrainer. **model** (`PreTrainedModel`) -- Model to be optimized, either an 'AutoModelForCausalLM' or an 'AutoModelForSeq2SeqLM'. Check the documentation of `PreTrainedModel` for more details. + **args** (`transformers.TrainingArguments`): -- The arguments to use for training. **tokenizer** (`PreTrainedTokenizerBase`) -- Tokenizer to be used for encoding the data. Check the documentation of `transformers.PreTrainedTokenizer` and `transformers.PreTrainedTokenizerFast` for more details. - **optimizer** (`torch.optim.Optimizer`, *optional*) -- Optimizer to be used for training. If no optimizer is - provided, the trainer will create an Adam optimizer with the learning rate specified in the configuration - object. + **optimizers** (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): -- The optimizer and scheduler to use for training. **data_collator** (Union[DataCollatorForLanguageModeling, DataCollatorForSeq2Seq], *optional*) -- Data collator to be used for training and passed along the dataloader. - **lr_scheduler** (`torch.optim.lr_scheduler`, *optional*) -- Learning rate scheduler to be used for training. + **eval_dataset** (`datasets.Dataset`): The dataset to use for evaluation. + **max_length** (`int`, defaults to `None`): -- The maximum length of the input. + **truncation_mode** (`str`, defaults to `keep_end`): -- The truncation mode to use, either `keep_end` or `keep_start`. + **preprocess_logits_for_metrics** (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): -- The function to use to preprocess the logits before computing the metrics. + **compute_metrics** (`Callable[[EvalPrediction], Dict]`, *optional*): -- The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to metric values. + **optimize_cuda_cache** (`bool`, *optional*, defaults to `False`) -- Optimize CUDA cache for slightly more memory-efficient training """ def __init__( self, - config: IterativeSFTConfig = None, model: PreTrainedModel = None, + args: TrainingArguments = None, tokenizer: PreTrainedTokenizerBase = None, - optimizer: Optional[torch.optim.Optimizer] = None, + optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = ( + None, + None, + ), data_collator: Optional[DataCollator] = None, - lr_scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None, + eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None, + max_length: Optional[int] = None, + truncation_mode: Optional[str] = "keep_end", + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], Dict]] = None, + optimize_cuda_cache: Optional[bool] = False, ): - """ - Initialize IterativeSFTTrainer. - - Args: - config (`IterativeConfig`): - Configuration object for IterativeTrainer. - model (`PreTrainedModel`): - Hugging Face transformer model. - tokenizer (`transformers.PreTrainedTokenizerBase`): - Hugging Face tokenizer. - optimizer (Optional[`torch.optim.Optimizer`]): - Optimizer used for training. If `None`, `Adam` is used as default. - data_collator (Optional['DataCollator']): - Data collator function. - lr_scheduler (Optional[`torch.optim.lr_scheduler`]): - Learning rate scheduler used for training. - """ - - self.config = config - - # initial seed for reproducible experiments - set_seed(config.seed) - # Step 0: check positional arguments validity - if not isinstance(config, IterativeSFTConfig): - raise ValueError(f"config must be a IterativeConfig, got {type(config)}") if not isinstance(tokenizer, (PreTrainedTokenizerBase)): raise ValueError( f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}" @@ -99,26 +83,8 @@ def __init__( "Please make sure that this is intended." ) - # Step 1: Initialize Accelerator - self.accelerator = Accelerator( - log_with=config.log_with, - gradient_accumulation_steps=config.gradient_accumulation_steps, - project_config=ProjectConfiguration(**config.project_kwargs), - **config.accelerator_kwargs, - ) - - is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard" - - self.accelerator.init_trackers( - config.tracker_project_name, - config=dict(trl_ppo_trainer_config=config.to_dict()) if not is_using_tensorboard else config.to_dict(), - init_kwargs=config.tracker_kwargs, - ) - - self.model = model - self.model_params = filter(lambda p: p.requires_grad, self.model.parameters()) - self.is_encoder_decoder = hasattr(self.model, "is_encoder_decoder") - self.is_peft_model = getattr(self.model, "is_peft_model", False) + self.is_encoder_decoder = hasattr(model, "is_encoder_decoder") + self.is_peft_model = getattr(model, "is_peft_model", False) self.tokenizer = tokenizer @@ -128,52 +94,34 @@ def __init__( "No data collator is provided. Using 'DataCollatorForSeq2Seq' with" "'labels_pad_token_id' set to '-100' and 'pad_to_multiple_of' set to 8." ) - self.data_collator = DataCollatorForSeq2Seq( - tokenizer, model=self.model, label_pad_token_id=-100, pad_to_multiple_of=8 - ) + self.data_collator = DataCollatorForSeq2Seq(tokenizer, label_pad_token_id=-100, pad_to_multiple_of=8) else: warnings.warn("No data collator is provided. Using 'DataCollatorForLanguageModeling'") self.data_collator = DataCollatorForLanguageModeling(self.tokenizer, mlm=False) else: self.data_collator = data_collator - if optimizer is None: - self.optimizer = Adam( - filter(lambda p: p.requires_grad, self.model.parameters()), - lr=self.config.learning_rate, - ) - else: - self.optimizer = optimizer - - self.lr_scheduler = lr_scheduler - if self.lr_scheduler is not None: - lr_scheduler_class = ( - torch.optim.lr_scheduler._LRScheduler - if not is_torch_greater_2_0() - else torch.optim.lr_scheduler.LRScheduler - ) - - if not isinstance(self.lr_scheduler, lr_scheduler_class): - raise ValueError( - "lr_scheduler must be a torch.optim.lr_scheduler._LRScheduler or torch.optim.lr_scheduler.LRScheduler (for torch >= 2.0)" - ) - - (self.model, self.optimizer, self.data_collator, self.lr_scheduler) = self.accelerator.prepare( - self.model, self.optimizer, self.data_collator, self.lr_scheduler + self.max_length = max_length + self.truncation_mode = truncation_mode + self.optimize_cuda_cache = optimize_cuda_cache + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + eval_dataset=eval_dataset, + tokenizer=tokenizer, + compute_metrics=compute_metrics, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, ) - self.is_distributed = self.accelerator.distributed_type == "MULTI_GPU" - - # post process for PP - if not getattr(self.model, "is_sequential_parallel", False): - self.current_device = self.accelerator.device - else: - self.current_device = torch.device("cuda:0") - - # init the current step - self.current_step = 0 + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) - PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache + PPODecorators.optimize_cuda_cache = self.optimize_cuda_cache def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): if attention_mask is None: @@ -189,11 +137,21 @@ def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Te input_data.pop("decoder_input_ids", None) # This is directly computed inside the model + input_data["labels"][input_data["labels"] == self.tokenizer.pad_token_id] = -100 + else: input_data = self.data_collator( [{"input_ids": ids, "attention_mask": att} for ids, att in zip(input_ids, attention_mask)] ).to(self.model.device) + # truncate in case the user has provided input_ids, attention_mask and labels + if self.truncation_mode == "keep_start": + input_data = {k: v[: self.max_length] for k, v in input_data.items()} + elif self.truncation_mode == "keep_end": + input_data = {k: v[-self.max_length :] for k, v in input_data.items()} + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + return input_data def compute_loss(self, model, inputs, return_outputs=False): @@ -223,7 +181,6 @@ def _step_safety_checker( texts: List[str], texts_labels: List[str], ): - """ Check if the input data is valid for training. @@ -270,10 +227,17 @@ def _step_safety_checker( return input_ids, attention_mask, labels, texts, texts_labels @PPODecorators.empty_cuda_cache() - def step(self, **kwargs): + def step( + self, + input_ids: Optional[List[torch.LongTensor]] = None, + attention_mask: Optional[List[torch.LongTensor]] = None, + labels: Optional[List[torch.LongTensor]] = None, + texts: Optional[List[str]] = None, + texts_labels: Optional[List[str]] = None, + ): """ Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and text_labels. - Keyword Args: + Args: input_ids (List[`torch.LongTensor`]): List of tensors containing the input_ids (if not provided, text will be used) attention_mask (List[`torch.LongTensor`], , *optional*): @@ -290,12 +254,6 @@ def step(self, **kwargs): self.model.train() - input_ids = kwargs.get("input_ids", None) - attention_mask = kwargs.get("attention_mask", None) - labels = kwargs.get("labels", None) - texts = kwargs.get("texts", None) - texts_labels = kwargs.get("texts_labels", None) - if input_ids is None and texts is None: raise ValueError("Step should include `input_ids` or `texts` as keyword arguments.") elif input_ids is not None and texts is not None: @@ -313,9 +271,16 @@ def step(self, **kwargs): ) if texts is not None: - input_ids = [self.tokenizer(text, return_tensors="pt")["input_ids"][0] for text in texts] + model_inputs = self.tokenizer( + texts, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" + ) + + input_ids, attention_mask = model_inputs["input_ids"], model_inputs["attention_mask"] + if texts_labels is not None: - labels = [self.tokenizer(text_labels, return_tensors="pt")["input_ids"][0] for text_labels in texts_labels] + labels = self.tokenizer( + texts, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" + )["input_ids"] if labels is None: warnings.warn("No labels are provided. Setting labels to input_ids") @@ -340,7 +305,7 @@ def collator(data): step_dataloader = DataLoader( batch_data, - batch_size=self.config.step_batch_size, + batch_size=self.args.per_device_train_batch_size, shuffle=True, collate_fn=collator, ) @@ -354,6 +319,12 @@ def collator(data): self.accelerator.backward(loss) + if self.accelerator.sync_gradients and self.args.max_grad_norm is not None: + self.accelerator.clip_grad_norm_( + self.model.parameters(), + self.args.max_grad_norm, + ) + self.optimizer.step() self.optimizer.zero_grad() if self.lr_scheduler is not None: From 57622f871ab233c87e7d714c3133709718af6fb9 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 21:43:30 -0400 Subject: [PATCH 22/38] remove iterative config from tests --- tests/test_iterative_sft_trainer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_iterative_sft_trainer.py b/tests/test_iterative_sft_trainer.py index 41c73065d6..2d9e8058bb 100644 --- a/tests/test_iterative_sft_trainer.py +++ b/tests/test_iterative_sft_trainer.py @@ -18,7 +18,7 @@ from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer -from trl import IterativeSFTConfig, IterativeSFTTrainer +from trl import IterativeSFTTrainer class IterativeTrainerTester(unittest.TestCase): @@ -57,7 +57,6 @@ def _init_textual_dummy_dataset(self): def setUp(self): # initialize trainer - self.iterative_config = IterativeSFTConfig(step_batch_size=2, log_with=None) self.model.train() return super().setUp() @@ -66,7 +65,7 @@ def setUp(self): ["gpt2", "tensor"], ["gpt2", "text"], ["t5", "tensor"], - ["t5", "text"], + # ["t5", "text"], ] ) def test_iterative_step_from_tensor(self, model_name, input_name): @@ -92,7 +91,7 @@ def test_iterative_step_from_tensor(self, model_name, input_name): model = self.t5_model tokenizer = self.t5_tokenizer - iterative_trainer = IterativeSFTTrainer(config=self.iterative_config, model=model, tokenizer=tokenizer) + iterative_trainer = IterativeSFTTrainer(model=model, tokenizer=tokenizer) iterative_trainer.step(**inputs) From c21091b6c52f560916fd44c58731438cecaaa149 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 21:44:29 -0400 Subject: [PATCH 23/38] remove iterative config from init --- trl/__init__.py | 1 - trl/trainer/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/trl/__init__.py b/trl/__init__.py index 16388fd398..d28e29e9c8 100644 --- a/trl/__init__.py +++ b/trl/__init__.py @@ -15,7 +15,6 @@ from .trainer import ( DataCollatorForCompletionOnlyLM, DPOTrainer, - IterativeSFTConfig, IterativeSFTTrainer, PPOConfig, PPOTrainer, diff --git a/trl/trainer/__init__.py b/trl/trainer/__init__.py index 9cf852144d..e81705fbc2 100644 --- a/trl/trainer/__init__.py +++ b/trl/trainer/__init__.py @@ -36,7 +36,6 @@ from .ddpo_trainer import DDPOTrainer from .dpo_trainer import DPOTrainer -from .iterative_sft_config import IterativeSFTConfig from .iterative_sft_trainer import IterativeSFTTrainer from .ppo_config import PPOConfig from .ppo_trainer import PPOTrainer From b4600fca033f0cf9c5886c7d5a6e582b0476d7ad Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 21:47:20 -0400 Subject: [PATCH 24/38] fix peft model --- trl/trainer/iterative_sft_trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 915b212ac4..f92f9b3371 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -29,6 +29,11 @@ from transformers.trainer_utils import EvalLoopOutput from ..core import PPODecorators +from ..import_utils import is_peft_available + + +if is_peft_available(): + from peft import PeftModel class IterativeSFTTrainer(Trainer): @@ -84,7 +89,7 @@ def __init__( ) self.is_encoder_decoder = hasattr(model, "is_encoder_decoder") - self.is_peft_model = getattr(model, "is_peft_model", False) + self.is_peft_model = is_peft_available() and isinstance(self.model, PeftModel) self.tokenizer = tokenizer From 4121cbe87c5712abb7652b754479a00907568723 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:03:40 -0400 Subject: [PATCH 25/38] change truncation side based on truncation_mode --- trl/trainer/iterative_sft_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index f92f9b3371..890fbd17e4 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -121,6 +121,8 @@ def __init__( preprocess_logits_for_metrics=preprocess_logits_for_metrics, ) + self.tokenizer.truncation_side = "left" if self.truncation_mode == "keep_end" else "right" + if not hasattr(self, "accelerator"): raise AttributeError( "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." From 441b05c1c1c4be849897665a4cdf59b8218aca38 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:04:19 -0400 Subject: [PATCH 26/38] removed iterativeconfig autodoc --- docs/source/iterative_sft_trainer.mdx | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/iterative_sft_trainer.mdx b/docs/source/iterative_sft_trainer.mdx index 908d2a9013..4c86809d75 100644 --- a/docs/source/iterative_sft_trainer.mdx +++ b/docs/source/iterative_sft_trainer.mdx @@ -70,5 +70,3 @@ config = IterativeSFTConfig( ## IterativeTrainer [[autodoc]] IterativeSFTTrainer - -[[autodoc]] IterativeSFTConfig From 3b28163938dcaacdbc78bb224e05bcaf39f36c10 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:07:01 -0400 Subject: [PATCH 27/38] fixed typo in trainer.mdx --- docs/source/trainer.mdx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/trainer.mdx b/docs/source/trainer.mdx index 599b602016..0d2550a6b1 100644 --- a/docs/source/trainer.mdx +++ b/docs/source/trainer.mdx @@ -40,10 +40,6 @@ We also support a `RewardTrainer` that can be used to train a reward model. [[autodoc]] IterativeSFTTrainer -## IterativeSFTConfig - -[[autodoc]] IterativeSFTConfig - ## set_seed [[autodoc]] set_seed From 3907102bf3ab0c026ba1ba6274cfb685405316e2 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:08:51 -0400 Subject: [PATCH 28/38] remove mention of iterative config in docs --- docs/source/iterative_sft_trainer.mdx | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/docs/source/iterative_sft_trainer.mdx b/docs/source/iterative_sft_trainer.mdx index 4c86809d75..a6eaf5c98f 100644 --- a/docs/source/iterative_sft_trainer.mdx +++ b/docs/source/iterative_sft_trainer.mdx @@ -1,25 +1,19 @@ -# Iterative Supervised Fine-tuning Trainer +# Iterative Trainer Iterative fine-tuning is a training method that enables to perform custom actions (generation and filtering for example) between optimization steps. In TRL we provide an easy-to-use API to fine-tune your models in an iterative way in just a few lines of code. ## Usage -To get started quickly, instantiate an instance of the class with an 'IterativeSFTConfig', a model, and a tokenizer. +To get started quickly, instantiate an instance a model, and a tokenizer. ```python model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) -tokenizer.pad_token = tokenizer.eos_token - -config = IterativeSFTConfig( - model_name=model_name, - log_with=log_with, - project_kwargs={"logging_dir":logging_dir} -) +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token trainer = IterativeSFTTrainer( - config, model, tokenizer ) @@ -54,18 +48,6 @@ trainer.step(**inputs) ``` For causal language models, labels will automatically be created from input_ids or from texts. When using sequence to sequence models you will have to provide your own labels or text_labels. -The default step batch size is 32, but you can change it at the time of instance initialization of the 'IterativeSFTConfig' like so - -```python - -config = IterativeSFTConfig( - model_name=model_name, - step_batch_size=step_batch_size, - log_with=log_with, - project_kwargs={"logging_dir":logging_dir} -) - -``` ## IterativeTrainer From e31c3a5e34ca47e4135b75b752c57afda3a5d310 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:22:34 -0400 Subject: [PATCH 29/38] make sure optimizer and scheduler are created --- trl/trainer/iterative_sft_trainer.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 890fbd17e4..9f05061744 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -55,7 +55,7 @@ class IterativeSFTTrainer(Trainer): **truncation_mode** (`str`, defaults to `keep_end`): -- The truncation mode to use, either `keep_end` or `keep_start`. **preprocess_logits_for_metrics** (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): -- The function to use to preprocess the logits before computing the metrics. **compute_metrics** (`Callable[[EvalPrediction], Dict]`, *optional*): -- The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to metric values. - **optimize_cuda_cache** (`bool`, *optional*, defaults to `False`) -- Optimize CUDA cache for slightly more memory-efficient training + **optimize_cuda_cache** (`bool`, *optional*, defaults to `False`) -- Optimize CUDA cache for slightly more memory-efficient training. """ def __init__( @@ -87,6 +87,10 @@ def __init__( f"The current model class {type(model)} is not compatible with `.generate()`" "Please make sure that this is intended." ) + if optimizers[1] is None and args.max_steps == -1: + raise ValueError( + f"When no scheduler is provided, you need to set the total number of training steps to perform `max_steps`" + ) self.is_encoder_decoder = hasattr(model, "is_encoder_decoder") self.is_peft_model = is_peft_available() and isinstance(self.model, PeftModel) @@ -121,6 +125,13 @@ def __init__( preprocess_logits_for_metrics=preprocess_logits_for_metrics, ) + self.optimizer, self.lr_scheduler = optimizers + + if self.optimizer is None: + self.optimizer = self.create_optimizer() + if self.lr_scheduler is None: + self.lr_scheduler = self.create_scheduler(self.args.max_steps, optimizer=self.optimizer) + self.tokenizer.truncation_side = "left" if self.truncation_mode == "keep_end" else "right" if not hasattr(self, "accelerator"): From c4cd798097225ec89e638157e93fa5c9e0d27495 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:24:35 -0400 Subject: [PATCH 30/38] adding max_steps to test --- tests/test_iterative_sft_trainer.py | 5 +++-- trl/trainer/iterative_sft_trainer.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_iterative_sft_trainer.py b/tests/test_iterative_sft_trainer.py index 2d9e8058bb..ad9c5fea5a 100644 --- a/tests/test_iterative_sft_trainer.py +++ b/tests/test_iterative_sft_trainer.py @@ -16,7 +16,7 @@ import torch from datasets import Dataset from parameterized import parameterized -from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, TrainingArguments from trl import IterativeSFTTrainer @@ -91,7 +91,8 @@ def test_iterative_step_from_tensor(self, model_name, input_name): model = self.t5_model tokenizer = self.t5_tokenizer - iterative_trainer = IterativeSFTTrainer(model=model, tokenizer=tokenizer) + args = TrainingArguments(max_steps=2) + iterative_trainer = IterativeSFTTrainer(model=model, args=args, tokenizer=tokenizer) iterative_trainer.step(**inputs) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 9f05061744..5500cc3a00 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -89,7 +89,7 @@ def __init__( ) if optimizers[1] is None and args.max_steps == -1: raise ValueError( - f"When no scheduler is provided, you need to set the total number of training steps to perform `max_steps`" + "When no scheduler is provided, you need to set the total number of training steps to perform `max_steps`" ) self.is_encoder_decoder = hasattr(model, "is_encoder_decoder") From 78d58798944dfd2ec2aa5ceffffef0c02ed40d4b Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:28:42 -0400 Subject: [PATCH 31/38] remove log_stats fn --- trl/trainer/iterative_sft_trainer.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 5500cc3a00..030708e119 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -352,31 +352,3 @@ def collator(data): all_stats.append(dict(loss=dict(total=loss.detach()))) return all_stats - - def log_stats( - self, - stats: dict, - ): - """ - A function that logs all the training stats. - - Args: - stats (dict[str, Any]): - A dictionary of training stats. - """ - # Log only if we are in the main process - if self.accelerator.is_main_process: - logs = {} - - logs.update(stats) - - # manually cast in fp32 for bf16 torch tensors - for k, v in logs.items(): - if isinstance(v, torch.Tensor) and v.dtype == torch.bfloat16: - logs[k] = v.float() - - if self.config.log_with == "tensorboard": - # update the current step - self.current_step += 1 - - self.accelerator.log(logs, step=self.current_step if self.config.log_with == "tensorboard" else None) From 5b2da4e3de981cb94414d6a833dcbf11ca17a087 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Tue, 31 Oct 2023 22:39:40 -0400 Subject: [PATCH 32/38] remove compute loss --- trl/trainer/iterative_sft_trainer.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 030708e119..a7a1bce967 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -172,25 +172,6 @@ def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Te return input_data - def compute_loss(self, model, inputs, return_outputs=False): - """ - Loss is computed as in the HuggingFace Trainer. - - Subclass and override for custom behavior. - """ - - outputs = model(**inputs) - - if isinstance(outputs, dict) and "loss" not in outputs: - raise ValueError( - "The model did not return a loss from the inputs, only the following keys: " - f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}." - ) - # We don't use .loss here since the model may return tuples instead of ModelOutput. - loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0] - - return (loss, outputs) if return_outputs else loss - @staticmethod def _step_safety_checker( input_ids: List[torch.LongTensor], From 5de3ddf4cb0cdd2b4a89985d1221f8cd63a33abe Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 09:26:26 -0400 Subject: [PATCH 33/38] fixing encoder decoder detection --- trl/trainer/iterative_sft_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index a7a1bce967..749b75c54c 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -92,7 +92,7 @@ def __init__( "When no scheduler is provided, you need to set the total number of training steps to perform `max_steps`" ) - self.is_encoder_decoder = hasattr(model, "is_encoder_decoder") + self.is_encoder_decoder = getattr(model.config, "is_encoder_decoder", False) self.is_peft_model = is_peft_available() and isinstance(self.model, PeftModel) self.tokenizer = tokenizer From 4e68b76e163d648f67958f3fcb4529c856d649d8 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 10:04:11 -0400 Subject: [PATCH 34/38] fix PPODecorator --- trl/trainer/iterative_sft_trainer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 749b75c54c..d2692ea2e4 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -55,7 +55,7 @@ class IterativeSFTTrainer(Trainer): **truncation_mode** (`str`, defaults to `keep_end`): -- The truncation mode to use, either `keep_end` or `keep_start`. **preprocess_logits_for_metrics** (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): -- The function to use to preprocess the logits before computing the metrics. **compute_metrics** (`Callable[[EvalPrediction], Dict]`, *optional*): -- The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to metric values. - **optimize_cuda_cache** (`bool`, *optional*, defaults to `False`) -- Optimize CUDA cache for slightly more memory-efficient training. + **optimize_device_cache ** (`bool`, *optional*, defaults to `False`) -- Optimize CUDA cache for slightly more memory-efficient training. """ def __init__( @@ -73,7 +73,7 @@ def __init__( truncation_mode: Optional[str] = "keep_end", preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, compute_metrics: Optional[Callable[[EvalLoopOutput], Dict]] = None, - optimize_cuda_cache: Optional[bool] = False, + optimize_device_cache : Optional[bool] = False, ): # Step 0: check positional arguments validity if not isinstance(tokenizer, (PreTrainedTokenizerBase)): @@ -112,7 +112,7 @@ def __init__( self.max_length = max_length self.truncation_mode = truncation_mode - self.optimize_cuda_cache = optimize_cuda_cache + self.optimize_device_cache = optimize_device_cache super().__init__( model=model, @@ -139,7 +139,7 @@ def __init__( "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." ) - PPODecorators.optimize_cuda_cache = self.optimize_cuda_cache + PPODecorators.optimize_device_cache = self.optimize_device_cache def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): if attention_mask is None: @@ -225,7 +225,7 @@ def _step_safety_checker( return input_ids, attention_mask, labels, texts, texts_labels - @PPODecorators.empty_cuda_cache() + @PPODecorators.empty_device_cache() def step( self, input_ids: Optional[List[torch.LongTensor]] = None, From 122494ef3e398e3b7822b50df2ecb01b14a430cd Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 12:14:13 -0400 Subject: [PATCH 35/38] run precommit --- trl/trainer/iterative_sft_trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index d2692ea2e4..74c54bcd73 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -73,7 +73,7 @@ def __init__( truncation_mode: Optional[str] = "keep_end", preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, compute_metrics: Optional[Callable[[EvalLoopOutput], Dict]] = None, - optimize_device_cache : Optional[bool] = False, + optimize_device_cache: Optional[bool] = False, ): # Step 0: check positional arguments validity if not isinstance(tokenizer, (PreTrainedTokenizerBase)): @@ -112,7 +112,7 @@ def __init__( self.max_length = max_length self.truncation_mode = truncation_mode - self.optimize_device_cache = optimize_device_cache + self.optimize_device_cache = optimize_device_cache super().__init__( model=model, @@ -139,7 +139,7 @@ def __init__( "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." ) - PPODecorators.optimize_device_cache = self.optimize_device_cache + PPODecorators.optimize_device_cache = self.optimize_device_cache def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): if attention_mask is None: From 71c28fd336bab86c9a40c849e8ee433bdfec62e5 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 21:51:07 -0400 Subject: [PATCH 36/38] fix testing --- tests/test_iterative_sft_trainer.py | 58 ++++++++++++++++------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/tests/test_iterative_sft_trainer.py b/tests/test_iterative_sft_trainer.py index ad9c5fea5a..70d5640795 100644 --- a/tests/test_iterative_sft_trainer.py +++ b/tests/test_iterative_sft_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import tempfile import unittest import torch @@ -65,36 +66,41 @@ def setUp(self): ["gpt2", "tensor"], ["gpt2", "text"], ["t5", "tensor"], - # ["t5", "text"], + ["t5", "text"], ] ) def test_iterative_step_from_tensor(self, model_name, input_name): - # initialize dataset - if input_name == "tensor": - dummy_dataset = self._init_tensor_dummy_dataset() - inputs = { - "input_ids": dummy_dataset["input_ids"], - "attention_mask": dummy_dataset["attention_mask"], - "labels": dummy_dataset["labels"], - } - else: - dummy_dataset = self._init_textual_dummy_dataset() - inputs = { - "texts": dummy_dataset["texts"], - "texts_labels": dummy_dataset["texts_labels"], - } + with tempfile.TemporaryDirectory() as tmp_dir: + # initialize dataset + if input_name == "tensor": + dummy_dataset = self._init_tensor_dummy_dataset() + inputs = { + "input_ids": dummy_dataset["input_ids"], + "attention_mask": dummy_dataset["attention_mask"], + "labels": dummy_dataset["labels"], + } + else: + dummy_dataset = self._init_textual_dummy_dataset() + inputs = { + "texts": dummy_dataset["texts"], + "texts_labels": dummy_dataset["texts_labels"], + } - if model_name == "gpt2": - model = self.model - tokenizer = self.tokenizer - else: - model = self.t5_model - tokenizer = self.t5_tokenizer + if model_name == "gpt2": + model = self.model + tokenizer = self.tokenizer + else: + model = self.t5_model + tokenizer = self.t5_tokenizer - args = TrainingArguments(max_steps=2) - iterative_trainer = IterativeSFTTrainer(model=model, args=args, tokenizer=tokenizer) + args = TrainingArguments( + output_dir=tmp_dir, + per_device_train_batch_size=2, + max_steps=2, + ) + iterative_trainer = IterativeSFTTrainer(model=model, args=args, tokenizer=tokenizer) - iterative_trainer.step(**inputs) + iterative_trainer.step(**inputs) - for param in iterative_trainer.model.parameters(): - assert param.grad is not None + for param in iterative_trainer.model.parameters(): + assert param.grad is not None From 00a89ca51d3f8fd1d29119c6b46e9ab8769367cf Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 21:52:04 -0400 Subject: [PATCH 37/38] fix small typos in iterative trainer --- trl/trainer/iterative_sft_trainer.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 74c54bcd73..20b9b8df58 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -93,7 +93,7 @@ def __init__( ) self.is_encoder_decoder = getattr(model.config, "is_encoder_decoder", False) - self.is_peft_model = is_peft_available() and isinstance(self.model, PeftModel) + self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) self.tokenizer = tokenizer @@ -117,7 +117,7 @@ def __init__( super().__init__( model=model, args=args, - data_collator=data_collator, + data_collator=self.data_collator, eval_dataset=eval_dataset, tokenizer=tokenizer, compute_metrics=compute_metrics, @@ -125,12 +125,12 @@ def __init__( preprocess_logits_for_metrics=preprocess_logits_for_metrics, ) - self.optimizer, self.lr_scheduler = optimizers + self.create_optimizer_and_scheduler(self.args.max_steps) - if self.optimizer is None: - self.optimizer = self.create_optimizer() - if self.lr_scheduler is None: - self.lr_scheduler = self.create_scheduler(self.args.max_steps, optimizer=self.optimizer) + # prepare model, optimizer and lr_scheduler + self.model, self.optimizer, self.lr_scheduler = self.accelerator.prepare( + self.model, self.optimizer, self.lr_scheduler + ) self.tokenizer.truncation_side = "left" if self.truncation_mode == "keep_end" else "right" @@ -163,12 +163,13 @@ def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Te ).to(self.model.device) # truncate in case the user has provided input_ids, attention_mask and labels - if self.truncation_mode == "keep_start": - input_data = {k: v[: self.max_length] for k, v in input_data.items()} - elif self.truncation_mode == "keep_end": - input_data = {k: v[-self.max_length :] for k, v in input_data.items()} - else: - raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + if self.max_length is not None: + if self.truncation_mode == "keep_start": + input_data = {k: v[: self.max_length] for k, v in input_data.items()} + elif self.truncation_mode == "keep_end": + input_data = {k: v[-self.max_length :] for k, v in input_data.items()} + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") return input_data From aa70ca3d570e8c314b60f2ec81310a5700e198c7 Mon Sep 17 00:00:00 2001 From: Gaetan LOPEZ Date: Wed, 1 Nov 2023 22:27:36 -0400 Subject: [PATCH 38/38] adapted function log and eval --- trl/trainer/iterative_sft_trainer.py | 41 ++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 20b9b8df58..006b02ad51 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -251,9 +251,12 @@ def step( Returns: `dict[str, Any]`: A summary of the training statistics """ - self.model.train() + if self.state.global_step == 0: + self.tr_loss = torch.tensor(0.0).to(self.args.device) + self._globalstep_last_logged = self.state.global_step + if input_ids is None and texts is None: raise ValueError("Step should include `input_ids` or `texts` as keyword arguments.") elif input_ids is not None and texts is not None: @@ -310,13 +313,16 @@ def collator(data): collate_fn=collator, ) - all_stats = [] - for _, batch in enumerate(step_dataloader): with self.accelerator.accumulate(self.model): model_inputs = {k: batch[k] for k in model_inputs_names} loss = self.compute_loss(self.model, model_inputs) + if self.args.n_gpu > 1: + loss = loss.mean() + + tr_loss_step = loss.detach() + self.accelerator.backward(loss) if self.accelerator.sync_gradients and self.args.max_grad_norm is not None: @@ -330,7 +336,32 @@ def collator(data): if self.lr_scheduler is not None: self.lr_scheduler.step() + self.state.global_step += 1 + # update stats etc - all_stats.append(dict(loss=dict(total=loss.detach()))) + self.tr_loss += tr_loss_step + + self._maybe_log_save_evaluate() + + def _maybe_log_save_evaluate(self): + # check if eval is required + if self.args.eval_steps is not None: + if self.state.global_step % self.args.eval_steps == 0 and self.state.global_step != 0: + self.evaluate(self.eval_dataset) + + # check if logging is required + if self.args.logging_steps is not None: + if self.state.global_step % self.args.logging_steps == 0 and self.state.global_step != 0: + logs: Dict[str, float] = {} + + tr_loss_scalar = self._nested_gather(self.tr_loss).mean().item() + + # reset tr_loss to zero + self.tr_loss -= self.tr_loss + + logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) + logs["learning_rate"] = self._get_learning_rate() + + self._globalstep_last_logged = self.state.global_step - return all_stats + self.log(logs)