Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test convert atol regression #293

Open
sgligorijevicTT opened this issue Mar 3, 2025 · 2 comments
Open

Test convert atol regression #293

sgligorijevicTT opened this issue Mar 3, 2025 · 2 comments

Comments

@sgligorijevicTT
Copy link
Contributor

test_convert[int32-uint16] regressed after latest uplift, from passing to failing because of high atol.
This is likely related to tenstorrent/tt-mlir#2272

@sgligorijevicTT
Copy link
Contributor Author

TTIR and TTNN graphs before uplift:

module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
  func.func public @main(%arg0: tensor<32x32xui16> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xi32> {jax.result_info = ""}) {
    %0 = tensor.empty() : tensor<32x32xi32>
    %1 = "ttir.typecast"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<32x32xui16>, tensor<32x32xi32>) -> tensor<32x32xi32>
    return %1 : tensor<32x32xi32>
  }
}
module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, tt.device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>, tt.system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func public @main(%arg0: tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>> {jax.result_info = ""}) {
    %0 = "ttnn.typecast"(%arg0) <{dtype = #tt.supportedDataTypes<u32>}> : (tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>>) -> tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>>
    return %0 : tensor<32x32xui32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u32>, #ttnn.buffer_type<dram>>, <interleaved>>>
  }
}

TTIR and TTNN graphs after uplift:

module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
  func.func public @main(%arg0: tensor<32x32xui16> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xi32> {jax.result_info = ""}) {
    %0 = tensor.empty() : tensor<32x32xi32>
    %1 = "ttir.typecast"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<32x32xui16>, tensor<32x32xi32>) -> tensor<32x32xi32>
    return %1 : tensor<32x32xi32>
  }
}
module @jit_convert attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, tt.device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>, tt.system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>, <si32>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>} {
  func.func public @main(%arg0: tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>> {mhlo.sharding = "{replicated}"}) -> (tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>> {jax.result_info = ""}) {
    %0 = "ttnn.typecast"(%arg0) <{dtype = #tt.supportedDataTypes<si32>}> : (tensor<32x32xui16, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, u16>, #ttnn.buffer_type<dram>>, <interleaved>>>) -> tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>>
    return %0 : tensor<32x32xsi32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<1x1x!tt.tile<32x32, si32>, #ttnn.buffer_type<dram>>, <interleaved>>>
  }
}

sgligorijevicTT added a commit that referenced this issue Mar 3, 2025
This PR uplifts tt-mlir to the latest version and adds support for int32
buffers.
It also skips a couple of dtype conversion tests.
`test_convert[int64-uint16]` was broken from the start and was passing
erroneously before. `test_convert[int32-uint16]` appears to be a real
regression, we will have to investigate what's going on later.

Related issues: #293 #294
@mrakitaTT
Copy link
Contributor

The graphs seem fine, so I suspect that the problem is either in runtime (metal) or in PJRT buffer creation from si32 tensor. I would first verify with ttrt that the runtime is creating tensor with expected values.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants