TTNN sharded to interleaved fail in MNIST #795

jmalone-tt · 2025-02-26T20:44:58Z

Forward pass in MNIST model fails in latest main.

Repro

pytest tests/models/mnist/test_mnist.py

Log

    def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1):
        ttnn_from_torch = ttnn_decorators_ttnn_from_torch(arg8_1, device = ttnn_Specified_Device, layout = ttnn_TILE_LAYOUT, dtype = ttnn_bfloat16);  arg8_1 = None
        ttnn_permute = ttnn_decorators_ttnn_permute(ttnn_from_torch, (0, 2, 3, 1));  ttnn_from_torch = None
        ttnn_from_device = ttnn_decorators_ttnn_from_device(ttnn_permute);  ttnn_permute = None
        ttnn_to_layout_1 = ttnn_decorators_ttnn_to_layout(ttnn_from_device, ttnn_ROW_MAJOR_LAYOUT);  ttnn_from_device = None
        ttnn_reshape = ttnn_decorators_ttnn_reshape(ttnn_to_layout_1, (1, 1, 784, 1));  ttnn_to_layout_1 = None
        ttnn_from_torch_1 = ttnn_decorators_ttnn_from_torch(arg1_1, layout = ttnn_ROW_MAJOR_LAYOUT, dtype = ttnn_bfloat16);  arg1_1 = None
        ttnn_reshape_1 = ttnn_decorators_ttnn_reshape(ttnn_from_torch_1, (1, 1, 1, 32));  ttnn_from_torch_1 = None
        ttnn_from_device_1 = ttnn_decorators_ttnn_from_device(ttnn_reshape_1);  ttnn_reshape_1 = None
        ttnn_to_layout_2 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_1, ttnn_TILE_LAYOUT);  ttnn_from_device_1 = None
        ttnn_to_device = ttnn_decorators_ttnn_to_device(ttnn_to_layout_2, device = ttnn_Specified_Device);  ttnn_to_layout_2 = None
        move_to_host = torch_ttnn_passes_lowering_target_wrappers_move_to_host(ttnn_to_device, ttnn_ROW_MAJOR_LAYOUT);  ttnn_to_device = None
        ttnn_from_device_2 = ttnn_decorators_ttnn_from_device(ttnn_reshape);  ttnn_reshape = None
        ttnn_to_layout_3 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_2, ttnn_TILE_LAYOUT);  ttnn_from_device_2 = None
        ttnn_to_device_1 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_3, device = ttnn_Specified_Device);  ttnn_to_layout_3 = None
        ttnn_from_torch_2 = ttnn_decorators_ttnn_from_torch(arg0_1, layout = ttnn_ROW_MAJOR_LAYOUT, dtype = ttnn_bfloat16);  arg0_1 = None
        conv = torch_ttnn_passes_lowering_target_wrappers_conv(ttnn_to_device_1, ttnn_from_torch_2, move_to_host, 1, 1, 32, [28, 28], [3, 3], [1, 1], [0, 0], [1, 1], 1
, ttnn_Specified_Device, False, None);  ttnn_to_device_1 = ttnn_from_torch_2 = move_to_host = None
        ttnn_sharded_to_interleaved = ttnn_decorators_ttnn_sharded_to_interleaved(conv, ttnn_L1_MEMORY_CONFIG);  conv = None
        ttnn_from_device_3 = ttnn_decorators_ttnn_from_device(ttnn_sharded_to_interleaved);  ttnn_sharded_to_interleaved = None
        ttnn_to_layout_4 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_3, ttnn_ROW_MAJOR_LAYOUT);  ttnn_from_device_3 = None
        ttnn_reshape_2 = ttnn_decorators_ttnn_reshape(ttnn_to_layout_4, (1, 26, 26, 32));  ttnn_to_layout_4 = None
        ttnn_from_device_4 = ttnn_decorators_ttnn_from_device(ttnn_reshape_2);  ttnn_reshape_2 = None
        ttnn_to_layout_5 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_4, ttnn_TILE_LAYOUT);  ttnn_from_device_4 = None
        ttnn_to_device_2 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_5, device = ttnn_Specified_Device);  ttnn_to_layout_5 = None
        ttnn_permute_1 = ttnn_decorators_ttnn_permute(ttnn_to_device_2, (0, 3, 1, 2));  ttnn_to_device_2 = None
        ttnn_relu = ttnn_decorators_ttnn_relu(ttnn_permute_1);  ttnn_permute_1 = None
        ttnn_permute_2 = ttnn_decorators_ttnn_permute(ttnn_relu, (0, 2, 3, 1));  ttnn_relu = None
        ttnn_from_device_5 = ttnn_decorators_ttnn_from_device(ttnn_permute_2);  ttnn_permute_2 = None
        ttnn_to_layout_6 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_5, ttnn_ROW_MAJOR_LAYOUT);  ttnn_from_device_5 = None
        ttnn_reshape_3 = ttnn_decorators_ttnn_reshape(ttnn_to_layout_6, (1, 1, 676, 32));  ttnn_to_layout_6 = None
        ttnn_from_torch_3 = ttnn_decorators_ttnn_from_torch(arg3_1, layout = ttnn_ROW_MAJOR_LAYOUT, dtype = ttnn_bfloat16);  arg3_1 = None
        ttnn_reshape_4 = ttnn_decorators_ttnn_reshape(ttnn_from_torch_3, (1, 1, 1, 64));  ttnn_from_torch_3 = None
        ttnn_from_device_6 = ttnn_decorators_ttnn_from_device(ttnn_reshape_4);  ttnn_reshape_4 = None
        ttnn_to_layout_7 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_6, ttnn_TILE_LAYOUT);  ttnn_from_device_6 = None
        ttnn_to_device_3 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_7, device = ttnn_Specified_Device);  ttnn_to_layout_7 = None
        move_to_host_1 = torch_ttnn_passes_lowering_target_wrappers_move_to_host(ttnn_to_device_3, ttnn_ROW_MAJOR_LAYOUT);  ttnn_to_device_3 = None
        ttnn_from_device_7 = ttnn_decorators_ttnn_from_device(ttnn_reshape_3);  ttnn_reshape_3 = None
        ttnn_to_layout_8 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_7, ttnn_TILE_LAYOUT);  ttnn_from_device_7 = None
        ttnn_to_device_4 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_8, device = ttnn_Specified_Device);  ttnn_to_layout_8 = None
        ttnn_from_torch_4 = ttnn_decorators_ttnn_from_torch(arg2_1, layout = ttnn_ROW_MAJOR_LAYOUT, dtype = ttnn_bfloat16);  arg2_1 = None
        conv_1 = torch_ttnn_passes_lowering_target_wrappers_conv(ttnn_to_device_4, ttnn_from_torch_4, move_to_host_1, 1, 32, 64, [26, 26], [3, 3], [1, 1], [0, 0], [1,
1], 1, ttnn_Specified_Device, False, None);  ttnn_to_device_4 = ttnn_from_torch_4 = move_to_host_1 = None
        ttnn_sharded_to_interleaved_1 = ttnn_decorators_ttnn_sharded_to_interleaved(conv_1, ttnn_L1_MEMORY_CONFIG);  conv_1 = None
        ttnn_from_device_8 = ttnn_decorators_ttnn_from_device(ttnn_sharded_to_interleaved_1);  ttnn_sharded_to_interleaved_1 = None
        ttnn_to_layout_9 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_8, ttnn_ROW_MAJOR_LAYOUT);  ttnn_from_device_8 = None
        ttnn_reshape_5 = ttnn_decorators_ttnn_reshape(ttnn_to_layout_9, (1, 24, 24, 64));  ttnn_to_layout_9 = None
        ttnn_from_device_9 = ttnn_decorators_ttnn_from_device(ttnn_reshape_5);  ttnn_reshape_5 = None
        ttnn_to_layout_10 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_9, ttnn_TILE_LAYOUT);  ttnn_from_device_9 = None
        ttnn_to_device_5 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_10, device = ttnn_Specified_Device);  ttnn_to_layout_10 = None
        ttnn_permute_3 = ttnn_decorators_ttnn_permute(ttnn_to_device_5, (0, 3, 1, 2));  ttnn_to_device_5 = None
        ttnn_relu_1 = ttnn_decorators_ttnn_relu(ttnn_permute_3);  ttnn_permute_3 = None
        ttnn_permute_4 = ttnn_decorators_ttnn_permute(ttnn_relu_1, (0, 2, 3, 1));  ttnn_relu_1 = None
        ttnn_from_device_10 = ttnn_decorators_ttnn_from_device(ttnn_permute_4);  ttnn_permute_4 = None
        ttnn_to_layout_11 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_10, ttnn_ROW_MAJOR_LAYOUT);  ttnn_from_device_10 = None
        ttnn_reshape_6 = ttnn_decorators_ttnn_reshape(ttnn_to_layout_11, (1, 1, 576, 64));  ttnn_to_layout_11 = None
        ttnn_from_device_11 = ttnn_decorators_ttnn_from_device(ttnn_reshape_6);  ttnn_reshape_6 = None
        ttnn_to_layout_12 = ttnn_decorators_ttnn_to_layout(ttnn_from_device_11, ttnn_TILE_LAYOUT);  ttnn_from_device_11 = None
        ttnn_to_device_6 = ttnn_decorators_ttnn_to_device(ttnn_to_layout_12, device = ttnn_Specified_Device);  ttnn_to_layout_12 = None
        ttnn_to_layout = ttnn_decorators_ttnn_to_layout(ttnn_to_device_6, ttnn_ROW_MAJOR_LAYOUT);  ttnn_to_device_6 = None
        ttnn_max_pool2d = ttnn_decorators_ttnn_max_pool2d(ttnn_to_layout, 1, 24, 24, 64, [2, 2], [2, 2], (0, 0), (1, 1));  ttnn_to_layout = None
>       ttnn_sharded_to_interleaved_2 = ttnn_decorators_ttnn_sharded_to_interleaved(ttnn_max_pool2d, ttnn_L1_MEMORY_CONFIG);  ttnn_max_pool2d = None

<eval_with_key>.49:63:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = FastOperation(python_fully_qualified_name='ttnn.sharded_to_interleaved', function=<ttnn._ttnn.operations.data_movement...<function default_postprocess_golden_fu
nction_outputs at 0x7fd63ae9f0a0>, is_cpp_operation=True, is_experimental=False)
function_args = (ttnn.Tensor(<buffer is not allocated>, shape=Shape([1, 1, 144, 64]), dtype=DataType::BFLOAT16, layout=Layout::ROW_MAJOR), MemoryConfig(memory_layout=T
ensorMemoryLayout::INTERLEAVED,buffer_type=BufferType::L1,shard_spec=std::nullopt))
function_kwargs = {}

    def __call__(self, *function_args, **function_kwargs):
>       return self.function(*function_args, **function_kwargs)
E       RuntimeError: TT_ASSERT @ /home/ubuntu/tt-metal/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp:82: num_cores_unpadded == num_cores
E       info:
E       number of cores 48 in shard spec not equal to the unpadded number of cores 54

The text was updated successfully, but these errors were encountered:

jmalone-tt · 2025-02-26T20:45:28Z

Temporarily marking this as compilation_xfail but need to investigate fix

jmalone-tt · 2025-02-26T21:39:55Z

Looks like this also shows up in:

pytest tests/models/torchvision/test_torchvision_object_detection.py

due to ssd300_vgg16

Log

RuntimeError: TT_ASSERT @ /home/ubuntu/tt-metal/ttnn/cpp/ttnn/operations/data_movement/sharded/sharded_to_interleaved/device/sharded_to_interleaved_program_factory.cpp:82: num_cores_unpadded == num_cores
info:
number of cores 61 in shard spec not equal to the unpadded number of cores 64

jmalone-tt added the bug Something isn't working label Feb 26, 2025

jmalone-tt changed the title ~~TTNN shared to interleaved fail in MNIST~~ TTNN sharded to interleaved fail in MNIST Feb 26, 2025

jmalone-tt removed the bug Something isn't working label Feb 27, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TTNN sharded to interleaved fail in MNIST #795

TTNN sharded to interleaved fail in MNIST #795

jmalone-tt commented Feb 26, 2025

jmalone-tt commented Feb 26, 2025

jmalone-tt commented Feb 26, 2025

TTNN sharded to interleaved fail in MNIST #795

TTNN sharded to interleaved fail in MNIST #795

Comments

jmalone-tt commented Feb 26, 2025

jmalone-tt commented Feb 26, 2025

jmalone-tt commented Feb 26, 2025