From 2f25f54ab91dd6a34ce67ae8b202e057301b2023 Mon Sep 17 00:00:00 2001 From: Samuel Larkin Date: Tue, 7 Jan 2025 15:26:28 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=92=EF=B8=8F=20Fix=20typo=20in=20`formatt?= =?UTF-8?q?ing=5Ffunc`'s=20documentation=20in=20`ConstantLengthDataset`=20?= =?UTF-8?q?(#2549)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- trl/trainer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index ce46ab55c7..cda8803f3c 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -552,7 +552,7 @@ class ConstantLengthDataset(IterableDataset): Name of the field in the dataset that contains the text. Only one of `dataset_text_field` and `formatting_func` should be provided. formatting_func (`Callable`, *optional*): - Function that formats the text before tokenization. Usually it is recommended to have follows a certain + Function that formats the text before tokenization. Usually it is recommended to follow a certain pattern such as `"### Question: {question} ### Answer: {answer}"`. Only one of `dataset_text_field` and `formatting_func` should be provided. infinite (`bool`, *optional*, defaults to `False`):