From a39f99800564fa2bb02ebb231dc13cd2b8d92794 Mon Sep 17 00:00:00 2001 From: Mark O'Connor Date: Tue, 12 Nov 2024 11:27:06 +0000 Subject: [PATCH] #0: Fix double deallocate --- models/demos/llama3/tt/llama_attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/models/demos/llama3/tt/llama_attention.py b/models/demos/llama3/tt/llama_attention.py index c12a1a69af5..9a51aad2a74 100644 --- a/models/demos/llama3/tt/llama_attention.py +++ b/models/demos/llama3/tt/llama_attention.py @@ -351,7 +351,6 @@ def forward_decode( dense_out_sharded, ttnn.L1_MEMORY_CONFIG ) # TODO: remove as soon as we have sharded support in for all CCL - ttnn.deallocate(attn_output_cat) ttnn.deallocate(dense_out_sharded) # All reduce