Test convert atol regression #293

sgligorijevicTT · 2025-03-03T09:43:57Z

test_convert[int32-uint16] regressed after latest uplift, from passing to failing because of high atol.
This is likely related to tenstorrent/tt-mlir#2272

The text was updated successfully, but these errors were encountered:

sgligorijevicTT · 2025-03-03T09:49:27Z

TTIR and TTNN graphs before uplift:

module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
  func.func public @main(%arg0: tensor<32x32xui16> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xi32> {jax.result_info = ""}) {
    %0 = tensor.empty() : tensor<32x32xi32>
    %1 = "ttir.typecast"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<32x32xui16>, tensor<32x32xi32>) -> tensor<32x32xi32>
    return %1 : tensor<32x32xi32>
  }
}
module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, tt.device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>, tt.system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func public @main(%arg0: tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>> {jax.result_info = ""}) {
    %0 = "ttnn.typecast"(%arg0) <{dtype = #tt.supportedDataTypes<u32>}> : (tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>>) -> tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>>
    return %0 : tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>>
  }
}

TTIR and TTNN graphs after uplift:

module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
  func.func public @main(%arg0: tensor<32x32xui16> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xi32> {jax.result_info = ""}) {
    %0 = tensor.empty() : tensor<32x32xi32>
    %1 = "ttir.typecast"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<32x32xui16>, tensor<32x32xi32>) -> tensor<32x32xi32>
    return %1 : tensor<32x32xi32>
  }
}
module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, tt.device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>, tt.system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>, <si32>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func public @main(%arg0: tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>> {jax.result_info = ""}) {
    %0 = "ttnn.typecast"(%arg0) <{dtype = #tt.supportedDataTypes<si32>}> : (tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>>) -> tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>>
    return %0 : tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>>
  }
}

This PR uplifts tt-mlir to the latest version and adds support for int32 buffers. It also skips a couple of dtype conversion tests. `test_convert[int64-uint16]` was broken from the start and was passing erroneously before. `test_convert[int32-uint16]` appears to be a real regression, we will have to investigate what's going on later. Related issues: #293 #294

mrakitaTT · 2025-03-03T18:56:38Z

The graphs seem fine, so I suspect that the problem is either in runtime (metal) or in PJRT buffer creation from si32 tensor. I would first verify with ttrt that the runtime is creating tensor with expected values.

sgligorijevicTT mentioned this issue Mar 3, 2025

Manual uplift (also adds support for int32 buffers) #292

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Test convert atol regression #293

Test convert atol regression #293

sgligorijevicTT commented Mar 3, 2025

sgligorijevicTT commented Mar 3, 2025

mrakitaTT commented Mar 3, 2025

Test convert atol regression #293

Test convert atol regression #293

Comments

sgligorijevicTT commented Mar 3, 2025

sgligorijevicTT commented Mar 3, 2025

mrakitaTT commented Mar 3, 2025