#10899: fix BERT memory overflow and layernorm shape mismatch issue

tenstorrent · Jul 30, 2024 · 80330e1 · 80330e1
1 parent 29b6071
commit 80330e1
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 4 deletions.
diff --git a/models/demos/metal_BERT_large_11/tt/embeddings.py b/models/demos/metal_BERT_large_11/tt/embeddings.py
@@ -154,10 +154,6 @@ def __call__(
             embeddings_type=ttnn.EmbeddingsType.BINARY,
             memory_config=self.model_config["OUTPUT_EMBEDDINGS_MEMCFG"],
         )
-        token_type_embeddings = ttnn.reshape(
-            token_type_embeddings,
-            [token_type_embeddings.shape[0], 1, token_type_embeddings.shape[1], token_type_embeddings.shape[2]],
-        )
         token_type_ids.deallocate()
 
         if self.position_embedding_type == "absolute":
@@ -184,6 +180,15 @@ def __call__(
                     position_embeddings_tt_tensor.shape[2],
                 ],
             )
+            inputs_plus_token_type_embeddings_tt_tensor = ttnn.reshape(
+                inputs_plus_token_type_embeddings_tt_tensor,
+                [
+                    inputs_plus_token_type_embeddings_tt_tensor.shape[0],
+                    1,
+                    inputs_plus_token_type_embeddings_tt_tensor.shape[1],
+                    inputs_plus_token_type_embeddings_tt_tensor.shape[2],
+                ],
+            )
             # Deallocate inputs_embeds and token_type_embeddings here to avoid having to move final output
             if self.model_config["DEALLOC_INPUT_EMBEDS_AFTER_POSITION_EMBEDS"]:
                 inputs_embeds.deallocate()

diff --git a/models/demos/metal_BERT_large_11/tt/mha.py b/models/demos/metal_BERT_large_11/tt/mha.py
@@ -48,6 +48,8 @@ def op1_qkv_fused(activation, qkv_weight, qkv_bias):
             return qkv
 
     grid_size = model_config.get("GRID_SIZE", device.compute_with_storage_grid_size())
+    if type(grid_size) == list:
+        grid_size = tt_lib.tensor.CoreCoord(tuple(grid_size))
 
     def op2_create_qkv_heads(qkv):
         (