tenstorrent · kevinwuTT · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/tests/lowering/tensor_manipulation/test_transpose.py b/tests/lowering/tensor_manipulation/test_transpose.py
@@ -21,6 +21,7 @@ def forward(self, x, dim0, dim1):
         # If not, this runtime error will be thrown:
         # RuntimeError: TT_FATAL @ ../tt_metal/impl/buffers/buffer.cpp:41: page_size % sizeof(uint32_t) == 0
         ((5, 3, 2), 0, 2),
+        ((1, 4150, 192), 1, 2),
     ],
 )
 def test_transpose(device, input_shape, dim0, dim1):

diff --git a/tests/lowering/tensor_manipulation/test_view.py b/tests/lowering/tensor_manipulation/test_view.py
@@ -80,6 +80,29 @@ def forward(self, x, new_shape):
         ((256, 4096), (1, 256, 4096)),
         ((1, 32, 16, 96), (1, 32, 1536)),
         ((1, 192, 4150), (1, 192, 50, 83)),
+        ((1, 32, 4608), (1, 32, 16, 3, 96)),
+        ((1, 100, 192), (100, 192)),
+        ((1, 1445, 192), (1, 1445, 3, 64)),
+        ((1, 1445, 192), (1445, 192)),
+        ((1, 1445, 3, 64), (1, 1445, 192)),
+        ((1, 1445, 768), (1445, 768)),
+        ((1, 192, 32, 42), (1, 192, 1344)),
+        ((1, 192, 4150), (1, 192, 50, 83)),
+        ((1, 3, 1445, 1445), (3, 1445, 1445)),
+        ((1, 3, 1445, 64), (3, 1445, 64)),
+        ((1, 3, 64, 1445), (3, 64, 1445)),
+        ((100, 192), (1, 100, 192)),
+        ((100, 4), (1, 100, 4)),
+        ((100, 92), (1, 100, 92)),
+        ((1445, 192), (1, 1445, 192)),
+        ((1445, 768), (1, 1445, 768)),
+        ((192), (1, 192, 1, 1)),
+        ((1), (1, 1, 1, 1)),
+        ((3, 1445, 1445), (1, 3, 1445, 1445)),
+        ((3, 1445, 64), (1, 3, 1445, 64)),
+        ((32), (1, 1, 32, 1)),
+        ((42), (1, 1, 1, 42)),
+        ((1, 10), (10,)),
     ],
 )
 def test_reshape(device, input_shape, new_shape, module):

diff --git a/torch_ttnn/passes/lowering/add_data_move_pass.py b/torch_ttnn/passes/lowering/add_data_move_pass.py
@@ -7,6 +7,7 @@
     TtnnBfloat16,
     TtnnUint32,
     HasValidPageSize,
+    CanBeTilized,
 )
 
 
@@ -137,13 +138,31 @@ def is_function_call(node) -> bool:
     [
         ttnn.reshape,
         ttnn.slice,
+        ttnn.full,
     ]
 )
 
 
-def can_be_tilized(node):
-    size = node.meta["val"].size()
-    return len(size) >= 2 and size[-1] % 32 == 0 and size[-2] % 32 == 0
+# FIXME: Workaround function for unsupported features for ttnn.reshape
+# BUG (https://github.com/tenstorrent/tt-metal/issues/13891)
+# BUG (https://github.com/tenstorrent/tt-metal/issues/13889)
+def can_reshape(node):
+    shape = node.meta["val"].size()
+    # Unsupported H dims
+    unsupported_H_dim = set([1, 1445, 100])
+    # Unsupported if output rank is > 4
+    return len(shape) >= 2 and shape[-2] not in unsupported_H_dim and len(shape) <= 4
+
+
+# FIXME: Workaround functions for unsupported features for ttnn.reshape
+def get_shape(node):
+    return node.meta["val"].size()
+
+
+def have_supported_ranks(src_node, dst_node):
+    dst_node_shape = get_shape(dst_node)
+    src_node_shape = get_shape(src_node)
+    return len(dst_node_shape) > 4 or len(src_node_shape) > 4 or len(dst_node_shape) == 1
 
 
 # For operations limitations
@@ -292,10 +311,11 @@ def try_add_data_move_in(src_node, dst_idx, dst_node, device) -> torch.fx.node.N
     with g.inserting_before(dst_node):
         kwargs = {}
         if (
-            (dst_node.target in TTNN_LAYOUT_CHANGE_OPS and not can_be_tilized(dst_node))
+            dst_node.target == ttnn.slice
             or dst_node.target == ttnn.embedding
             or dst_node.target == ttnn.zeros_like
             or dst_node.target == target_wrappers.repeat
+            or (dst_node.target == ttnn.reshape and have_supported_ranks(src_node, dst_node))
         ):
             kwargs["layout"] = TtnnRowMajorLayout()
         else:
@@ -306,10 +326,9 @@ def try_add_data_move_in(src_node, dst_idx, dst_node, device) -> torch.fx.node.N
         else:
             kwargs["dtype"] = TtnnBfloat16()
 
-        if (is_tt_compute(dst_node) and dst_node.target not in TTNN_LAYOUT_CHANGE_OPS) or (
-            dst_node.target in TTNN_LAYOUT_CHANGE_OPS and HasValidPageSize(src_node.meta["val"].size(), strict=True)
-        ):
-            kwargs["device"] = device
+        if is_tt_compute(dst_node):
+            if not (dst_node.target == ttnn.reshape and have_supported_ranks(src_node, dst_node)):
+                kwargs["device"] = device
 
         new_nodes.append(g.call_function(ttnn.from_torch, (src_node,), kwargs))
 
@@ -324,44 +343,47 @@ def try_add_layout_change_before_node(src_node, dst_idx, dst_node) -> torch.fx.n
     if not is_function_call(dst_node):
         return None
     if (
-        dst_node.target not in TTNN_LAYOUT_CHANGE_OPS
-        or dst_idx != 0
-        or not is_tt(src_node)
-        or (dst_node.target in TTNN_LAYOUT_CHANGE_OPS and can_be_tilized(dst_node))
+        not is_tt(src_node)
+        or dst_node.target not in TTNN_LAYOUT_CHANGE_OPS
+        or (dst_node.target == ttnn.reshape and not have_supported_ranks(src_node, dst_node))
+        or (dst_node.target == ttnn.full and CanBeTilized(dst_node))
+        or (dst_node.target == ttnn.slice and not HasValidPageSize(dst_node, strict=True))
     ):
         return None
 
     g = dst_node.graph
+    new_nodes = []
     with g.inserting_before(dst_node):
-        from_device = g.call_function(ttnn.from_device, (src_node,))
-        to_layout = g.call_function(ttnn.to_layout, (from_device, TtnnRowMajorLayout()))
+        new_nodes.append(g.call_function(ttnn.to_layout, (src_node, TtnnRowMajorLayout())))
+        if len(get_shape(dst_node)) > 4 or len(get_shape(dst_node)) == 1:
+            new_nodes.append(g.call_function(ttnn.from_device, (new_nodes[-1],)))
 
-    insert_node_between(src_node, dst_idx, dst_node, [from_device, to_layout])
+    insert_node_between(src_node, dst_idx, dst_node, new_nodes)
 
-    return to_layout
+    return new_nodes[-1]
 
 
 def try_add_layout_change_after_node(src_node, dst_idx, dst_node, device) -> torch.fx.node.Node:
     # Consider src_node is ttnn.repeat, and dst_node should be any tt_compute node that uses ttnn.repeat
     if not is_function_call(src_node):
         return None
     if (
-        src_node.target not in TTNN_LAYOUT_CHANGE_OPS.union(set([target_wrappers.repeat]))
-        or not is_tt_compute(dst_node)
+        not is_tt_compute(dst_node)
         or dst_node.target == ttnn.embedding
+        or dst_node.target == target_wrappers.repeat
+        or src_node.target not in TTNN_LAYOUT_CHANGE_OPS.union(set([target_wrappers.repeat]))
+        or (src_node.target == ttnn.reshape and can_reshape(src_node))
+        or (src_node.target == ttnn.full and CanBeTilized(src_node))
+        or (src_node.target == ttnn.slice and not HasValidPageSize(src_node, strict=True))
     ):
         return None
 
     g = dst_node.graph
     new_nodes = []
     with g.inserting_before(dst_node):
-        if dst_node.target != target_wrappers.repeat:
-            new_nodes.append(
-                g.call_function(ttnn.to_layout, (new_nodes[-1] if new_nodes else src_node, TtnnTileLayout()))
-            )
-        new_nodes.append(
-            g.call_function(ttnn.to_device, (new_nodes[-1] if new_nodes else src_node,), {"device": device})
-        )
+        new_nodes.append(g.call_function(ttnn.to_layout, (new_nodes[-1] if new_nodes else src_node, TtnnTileLayout())))
+        if len(get_shape(src_node)) > 4 or len(get_shape(src_node)) == 1:
+            new_nodes.append(g.call_function(ttnn.to_device, (new_nodes[-1], TtnnDevice())))
 
     insert_node_between(src_node, dst_idx, dst_node, new_nodes)
 

diff --git a/torch_ttnn/passes/lowering/to_tt_pass.py b/torch_ttnn/passes/lowering/to_tt_pass.py
@@ -9,6 +9,7 @@
     TtnnDramMemoryConfig,
     TtnnRowMajorLayout,
     HasValidPageSize,
+    CanBeTilized,
 )
 import numpy as np
 from typing import Tuple
@@ -526,17 +527,18 @@ def rewrite_node(node):
                 # Page size must be divisible by sizeof(uint32_t) because buffers hold uint32_t values
                 if shape[-1] != 1 and HasValidPageSize(shape):
                     if isinstance(args[1], float):
+                        layout = TtnnTileLayout() if CanBeTilized(shape) else TtnnRowMajorLayout()
                         new_kwargs = {
                             "fill_value": args[1],
                             "device": TtnnDevice(),
+                            "layout": layout,
                         }
                         full = g.call_function(
                             ttnn.full,
                             args=(tuple(shape),),
                             kwargs=new_kwargs,
                         )
-                        to_layout = g.call_function(ttnn.to_layout, (full,), {"layout": TtnnTileLayout()})
-                        recip = g.call_function(ttnn.reciprocal, (to_layout,), {})
+                        recip = g.call_function(ttnn.reciprocal, (full,), {})
                     else:
                         recip = g.call_function(ttnn.reciprocal, (args[1],), {})
                     return g.call_function(ttnn.mul, (args[0], recip), {})

diff --git a/torch_ttnn/utils.py b/torch_ttnn/utils.py
@@ -9,12 +9,30 @@ def GraphCleanup(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
     return gm
 
 
+def GetShape(node_or_shape):
+    if isinstance(node_or_shape, torch.fx.node.Node):
+        if (val := node_or_shape.meta.get("val", None)) is not None:
+            return val.size()
+    elif isinstance(node_or_shape, torch.Size):
+        return node_or_shape
+
+    return None
+
+
 # Certain ops don't support certain shapes and will emit a valid_page_size error
 # RuntimeError: TT_FATAL @ ../tt_metal/impl/buffers/buffer.cpp:38: valid_page_size
 # For valid non-interleaved buffers page size 2048 must equal buffer size X. For interleaved-buffers page size should be divisible by buffer size
-def HasValidPageSize(shape, strict=False):
-    if len(shape) >= 2 and shape[-1] > 0:
-        return shape[-1] % 32 == 0 or (not strict and shape[-1] < 32)
+def HasValidPageSize(node_or_shape, strict=False):
+    if (shape := GetShape(node_or_shape)) is not None:
+        if len(shape) >= 2 and len(shape) <= 4 and shape[-1] > 0:
+            return shape[-1] % 32 == 0 or (not strict and shape[-1] < 32)
+    return False
+
+
+def CanBeTilized(node_or_shape):
+    if (shape := GetShape(node_or_shape)) is not None:
+        if len(shape) >= 2 and len(shape) <= 4 and shape[-1] > 0 and shape[-2] > 1:
+            return shape[-1] % 32 == 0 or shape[-2] % 32 == 0
     return False