Add fp8 related changes to mistral for text-generation (huggingface#918)

Co-authored-by: Jimin Ha <[email protected]> Co-authored-by: regisss <[email protected]>
skavulya · May 7, 2024 · 9f6eba3 · 9f6eba3
1 parent 7953cce
commit 9f6eba3
Show file tree

Hide file tree

Showing 3 changed files with 200 additions and 88 deletions.
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
@@ -75,13 +75,13 @@ def __init__(self, tokenizer, model, args, options):
         self.options = options
         self._device = args.device
         self.model_inputs = {"use_cache": self.options.use_cache}
-        if self.model.config.model_type in ["llama", "falcon"]:
+        if self.model.config.model_type in ["llama", "mistral", "falcon"]:
             self.model_inputs.update(
                 {
                     "reuse_cache": self.options.reuse_cache,
                 }
             )
-        if self.model.config.model_type == "llama":
+        if self.model.config.model_type in ["llama", "mistral"]:
             self.model_inputs.update(
                 {
                     "attn_softmax_bf16": self.options.attn_softmax_bf16,