Skip to content

Commit

Permalink
#0: move ttnn experimental transpose to ttnn transpose
Browse files Browse the repository at this point in the history
  • Loading branch information
ntarafdar committed Jul 28, 2024
1 parent 41537e2 commit 1ebcd4c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
16 changes: 8 additions & 8 deletions models/demos/falcon7b_common/tt/falcon_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,11 @@ def forward(
key_layer_transposed = []
for i in range(self.num_devices):
key_layer_transposed.append(
ttnn.experimental.tensor.transpose(
ttnn.transpose(
key_layer[i],
-2,
-1,
output_mem_config=(
memory_config=(
self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"]
if llm_mode == "prefill" or self.model_config["l1_sharded"] == False
else ttnn.experimental.tensor.MemoryConfig(
Expand Down Expand Up @@ -439,11 +439,11 @@ def _optimized_forward(
key_layer_transposed = []
for i in range(self.num_devices):
key_layer_transposed.append(
ttnn.experimental.tensor.transpose(
ttnn.transpose(
key_layer[i],
-2,
-1,
output_mem_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
memory_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
)
)
key_layer[i].deallocate()
Expand Down Expand Up @@ -733,7 +733,7 @@ def forward(
)

for i in range(self.num_devices):
query_layer[i] = ttnn.experimental.tensor.transpose(
query_layer[i] = ttnn.transpose(
query_layer[i],
-2,
-3,
Expand Down Expand Up @@ -762,11 +762,11 @@ def forward(
key_layer_transposed = []
for i in range(self.num_devices):
key_layer_transposed.append(
ttnn.experimental.tensor.transpose(
ttnn.transpose(
key_layer[i],
-2,
-1,
output_mem_config=(
memory_config=(
self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"]
if self.model_config["l1_sharded"] == False
else ttnn.experimental.tensor.MemoryConfig(
Expand Down Expand Up @@ -927,7 +927,7 @@ def forward(

# Get batch in dim 2
for i in range(self.num_devices):
attn_output[i] = ttnn.experimental.tensor.transpose(
attn_output[i] = ttnn.transpose(
attn_output[i],
-2,
-3,
Expand Down
8 changes: 4 additions & 4 deletions models/demos/t3000/falcon40b/tt/falcon_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,11 @@ def fwd_prefill(
ttnn.experimental.tensor.typecast(value_layer, self.model_config["KV_CACHE_DTYPE"]),
user_id,
)
key_layer_transposed = ttnn.experimental.tensor.transpose(
key_layer_transposed = ttnn.transpose(
key_layer,
-2,
-1,
output_mem_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
memory_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
)
key_layer.deallocate(True)

Expand Down Expand Up @@ -610,11 +610,11 @@ def fwd_decode(
### PRE-SOFTMAX MM ###
######################
# TODO: Sharded transpose could be in place???
key_layer_transposed = ttnn.experimental.tensor.transpose(
key_layer_transposed = ttnn.transpose(
key_layer,
-2,
-1,
output_mem_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
memory_config=self.model_config["K_TRANSPOSED_OUTPUT_MEMCFG"],
)
key_layer.deallocate(True)

Expand Down

0 comments on commit 1ebcd4c

Please sign in to comment.