diff --git a/models/demos/llama3/tt/llama_attention.py b/models/demos/llama3/tt/llama_attention.py index c12a1a69af5..9a51aad2a74 100644 --- a/models/demos/llama3/tt/llama_attention.py +++ b/models/demos/llama3/tt/llama_attention.py @@ -351,7 +351,6 @@ def forward_decode( dense_out_sharded, ttnn.L1_MEMORY_CONFIG ) # TODO: remove as soon as we have sharded support in for all CCL - ttnn.deallocate(attn_output_cat) ttnn.deallocate(dense_out_sharded) # All reduce