Upgrade to Transformers 4.40 (huggingface#1027)

skavulya · May 31, 2024 · dd3cc08 · dd3cc08
1 parent 569580f
commit dd3cc08
Show file tree

Hide file tree

Showing 68 changed files with 1,720 additions and 1,117 deletions.
diff --git a/Makefile b/Makefile
@@ -74,7 +74,7 @@ slow_tests_fsdp: test_installs
 	python -m pytest tests/test_fsdp_examples.py -v -s --token $(TOKEN)
 
 slow_tests_trl: test_installs
-	python -m pip install trl==0.7.8
+	python -m pip install trl==0.8.6
 	python -m pip install peft==0.7.0
 	python -m pytest tests/test_trl.py -v -s -k "test_calculate_loss"
 

diff --git a/examples/audio-classification/run_audio_classification.py b/examples/audio-classification/run_audio_classification.py
@@ -46,8 +46,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
 

diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py
@@ -56,8 +56,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 

diff --git a/examples/contrastive-image-text/run_clip.py b/examples/contrastive-image-text/run_clip.py
@@ -61,8 +61,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 

diff --git a/examples/image-classification/run_image_classification.py b/examples/image-classification/run_image_classification.py
@@ -63,8 +63,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
 

diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md
@@ -714,7 +714,7 @@ python3 ../text-generation/run_generation.py \
 
 ## Streaming
 
-To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` with `--max_steps` specified in the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.
+To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` with `--max_steps` specified in the command line. This is supported by `run_mlm.py` and `run_clm.py`.
 
 For example:
 ```bash

diff --git a/examples/language-modeling/requirements.txt b/examples/language-modeling/requirements.txt
@@ -1,5 +1,5 @@
 torch >= 1.3
-datasets >= 2.4.0
+datasets >= 2.14.0
 sentencepiece != 0.1.92
 protobuf
 evaluate

diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
@@ -62,8 +62,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 

diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py
@@ -61,8 +61,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 

diff --git a/examples/protein-folding/run_esmfold.py b/examples/protein-folding/run_esmfold.py
@@ -40,7 +40,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.10.0")
+check_optimum_habana_min_version("1.11.0")
 
 
 def convert_outputs_to_pdb(outputs):

diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py
@@ -60,8 +60,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 

diff --git a/examples/question-answering/run_seq2seq_qa.py b/examples/question-answering/run_seq2seq_qa.py
@@ -56,8 +56,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 

diff --git a/examples/speech-recognition/run_speech_recognition_ctc.py b/examples/speech-recognition/run_speech_recognition_ctc.py
@@ -59,8 +59,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
 

diff --git a/examples/speech-recognition/run_speech_recognition_seq2seq.py b/examples/speech-recognition/run_speech_recognition_seq2seq.py
@@ -55,8 +55,8 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
 

diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
@@ -38,7 +38,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.10.0")
+check_optimum_habana_min_version("1.11.0")
 
 
 logger = logging.getLogger(__name__)

diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py
@@ -65,8 +65,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
 

diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
@@ -57,8 +57,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
 

diff --git a/examples/translation/run_translation.py b/examples/translation/run_translation.py
@@ -62,8 +62,8 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
-check_min_version("4.38.0")
-check_optimum_habana_min_version("1.10.0")
+check_min_version("4.40.0")
+check_optimum_habana_min_version("1.11.0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
 

diff --git a/examples/trl/dpo.py b/examples/trl/dpo.py
@@ -114,6 +114,7 @@ def get_stack_exchange_paired(
         split="train",
         cache_dir=cache_dir,
         data_dir=data_dir,
+        verification_mode="no_checks",  # TODO: remove this line when Datasets v2.19.2 is released
     )
     original_columns = dataset.column_names
 

diff --git a/examples/trl/requirements.txt b/examples/trl/requirements.txt
@@ -1,7 +1,6 @@
-trl == 0.7.8
+trl == 0.8.6
 peft == 0.6.2
 datasets
-wandb
 tyro
 evaluate
 scikit-learn
diff --git a/optimum/habana/transformers/generation/__init__.py b/optimum/habana/transformers/generation/__init__.py
@@ -1,6 +1,9 @@
 from .configuration_utils import GaudiGenerationConfig
 from .stopping_criteria import (
+    gaudi_EosTokenCriteria_call,
     gaudi_MaxLengthCriteria_call,
     gaudi_MaxNewTokensCriteria_call,
+    gaudi_MaxTimeCriteria_call,
+    gaudi_StoppingCriteriaList_call,
 )
 from .utils import MODELS_OPTIMIZED_WITH_STATIC_SHAPES, GaudiGenerationMixin
diff --git a/optimum/habana/transformers/generation/stopping_criteria.py b/optimum/habana/transformers/generation/stopping_criteria.py
@@ -14,17 +14,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
+from typing import Union
+
 import torch
 
 from optimum.utils import logging
 
 
 logger = logging.get_logger(__name__)
 
+# Instead of returning a tensor describing status of completeness of each sentence
+# we only return a single boolean describing the state of the batch
+# only when needs_tensor_output says so, we return array of booleans
+
+
+def create_return_const_tensor(input_ids, is_done):
+    return torch.full((input_ids.shape[0],), 1 if is_done else 0, device=input_ids.device, dtype=torch.uint8)
 
-def gaudi_MaxLengthCriteria_call(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+
+def gaudi_MaxLengthCriteria_call(
+    self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+) -> Union[torch.BoolTensor, bool]:
     token_idx = kwargs.get("token_idx", None)
     if token_idx is not None:
+        assert not kwargs["needs_tensor_output"]
         return token_idx >= self.max_length
     else:
         cur_len = input_ids.shape[-1]
@@ -35,12 +49,61 @@ def gaudi_MaxLengthCriteria_call(self, input_ids: torch.LongTensor, scores: torc
                 f"maximum length ({self.max_position_embeddings}). Depending on the model, you may observe "
                 "exceptions, performance degradation, or nothing at all."
             )
-        return is_done
+        return create_return_const_tensor(input_ids, is_done)
 
 
-def gaudi_MaxNewTokensCriteria_call(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+def gaudi_MaxNewTokensCriteria_call(
+    self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+) -> Union[torch.BoolTensor, bool]:
     token_idx = kwargs.get("token_idx", None)
     if token_idx is not None:
+        assert not kwargs["needs_tensor_output"]
         return token_idx >= self.max_length
     else:
-        return input_ids.shape[-1] >= self.max_length
+        is_done = input_ids.shape[-1] >= self.max_length
+        return create_return_const_tensor(input_ids, is_done)
+
+
+def gaudi_MaxTimeCriteria_call(
+    self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+) -> Union[torch.BoolTensor, bool]:
+    is_done = time.time() - self.initial_timestamp > self.max_time
+    if kwargs["needs_tensor_output"]:
+        return create_return_const_tensor(input_ids, is_done)
+    else:
+        return is_done
+
+
+def gaudi_EosTokenCriteria_call(
+    self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+) -> Union[torch.BoolTensor, bool]:
+    self.eos_token_id = self.eos_token_id.to(input_ids.device)
+    is_done = torch.isin(input_ids[:, -1], self.eos_token_id)
+    if kwargs["needs_tensor_output"]:
+        return is_done.byte()
+    else:
+        return torch.all(is_done).item()
+
+
+def needs_tensor_output(token_idx, ignore_eos, eos_token_id) -> bool:
+    if token_idx is None:
+        return not ignore_eos and eos_token_id is not None
+    else:
+        # token_idx is present, so we have static shapes, so using single boolean
+        return False
+
+
+def gaudi_StoppingCriteriaList_call(
+    self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
+) -> Union[torch.BoolTensor, bool]:
+    kwargs["needs_tensor_output"] = needs_tensor_output(
+        kwargs.get("token_idx", None), kwargs.get("ignore_eos", True), kwargs.get("eos_token_id", None)
+    )
+    is_done = (
+        torch.full((input_ids.shape[0],), 0, device=input_ids.device, dtype=torch.int8)
+        if kwargs["needs_tensor_output"]
+        else False
+    )
+    for criteria in self:
+        is_done = is_done | criteria(input_ids, scores, **kwargs)
+    return is_done