Fix: Adjust recipe to fit within QueueComputeScal HBM global memory s…

…ize limit (#1722) Co-authored-by: Yaser Afshar <[email protected]>
huggingface · Jan 31, 2025 · bf23006 · bf23006
1 parent 19f3337
commit bf23006
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/optimum/habana/transformers/models/mllama/modeling_mllama.py b/optimum/habana/transformers/models/mllama/modeling_mllama.py
@@ -694,6 +694,10 @@ def forward(
         next_decoder_cache = None if isinstance(past_key_values, Cache) else ()
 
         for idx, decoder_layer in enumerate(self.layers):
+            if not self.training and (
+                not torch.distributed.is_initialized() or torch.distributed.get_world_size() == 1
+            ):
+                htcore.mark_step()
             if output_hidden_states:
                 all_hidden_states += (hidden_states,)