From a39f99800564fa2bb02ebb231dc13cd2b8d92794 Mon Sep 17 00:00:00 2001
From: Mark O'Connor <moconnor@tenstorrent.com>
Date: Tue, 12 Nov 2024 11:27:06 +0000
Subject: [PATCH] #0: Fix double deallocate

---
 models/demos/llama3/tt/llama_attention.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/models/demos/llama3/tt/llama_attention.py b/models/demos/llama3/tt/llama_attention.py
index c12a1a69af5..9a51aad2a74 100644
--- a/models/demos/llama3/tt/llama_attention.py
+++ b/models/demos/llama3/tt/llama_attention.py
@@ -351,7 +351,6 @@ def forward_decode(
             dense_out_sharded, ttnn.L1_MEMORY_CONFIG
         )  # TODO: remove as soon as we have sharded support in for all CCL
 
-        ttnn.deallocate(attn_output_cat)
         ttnn.deallocate(dense_out_sharded)
 
         # All reduce