diff --git a/models/demos/lenet/demo/demo.py b/models/demos/lenet/demo/demo.py index 6008d09d9d4..3c9adf6cbef 100644 --- a/models/demos/lenet/demo/demo.py +++ b/models/demos/lenet/demo/demo.py @@ -31,7 +31,7 @@ def run_demo_dataset(device, batch_size, iterations, model_location_generator, r for iters in range(iterations): x = test_input.permute(0, 2, 3, 1) x = ttnn.from_torch(x, dtype=ttnn.bfloat16) - tt_output = tt_lenet.lenet(x, batch_size, device, parameters) + tt_output = tt_lenet.lenet(x, device, parameters) tt_output = ttnn.to_torch(tt_output) _, torch_predicted = torch.max(torch_output.data, -1) _, ttnn_predicted = torch.max(tt_output.data, -1) diff --git a/models/demos/lenet/tests/test_perf_lenet.py b/models/demos/lenet/tests/test_perf_lenet.py index d128f1f2361..e1f56080819 100644 --- a/models/demos/lenet/tests/test_perf_lenet.py +++ b/models/demos/lenet/tests/test_perf_lenet.py @@ -24,11 +24,11 @@ def get_expected_times(tt_lenet): if is_grayskull(): return { - tt_lenet: (7.2, 0.05), + tt_lenet: (7.62, 0.05), }[tt_lenet] elif is_wormhole_b0(): return { - tt_lenet: (10.1557, 0.045), + tt_lenet: (10.29, 0.047), }[tt_lenet] @@ -64,7 +64,6 @@ def test_perf_lenet(device, batch_size, tt_lenet, model_location_generator, rese ttnn_output = tt_lenet.lenet( device=device, input_tensor=x, - batch_size=batch_size, parameters=parameters, ) end = time.time() @@ -106,9 +105,9 @@ def test_perf_device_bare_metal(batch_size, reset_seeds): num_iterations = 1 margin = 0.03 if is_grayskull(): - expected_perf = 110955.849 + expected_perf = 83102.20 elif is_wormhole_b0(): - expected_perf = 60971.775 + expected_perf = 46313.985 command = f"pytest tests/ttnn/integration_tests/lenet/test_lenet.py" cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"] diff --git a/models/demos/lenet/tt/tt_lenet.py b/models/demos/lenet/tt/tt_lenet.py index 1a5ebd80def..4ce479d4dea 100644 --- a/models/demos/lenet/tt/tt_lenet.py +++ b/models/demos/lenet/tt/tt_lenet.py @@ -12,19 +12,23 @@ def conv(device, input_tensor, batch_size, parameters): conv_config = ttnn.Conv2dConfig( dtype=ttnn.bfloat16, weights_dtype=ttnn.bfloat16, - math_fidelity=ttnn.MathFidelity.LoFi, activation="relu", shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED, - math_approx_mode_enabled=True, - fp32_dest_acc_enabled=False, - packer_l1_accum_enabled=False, input_channels_alignment=32, transpose_shards=False, reshard_if_not_optimal=True, deallocate_activation=True, reallocate_halo_output=True, ) - [x, out_height, out_width, weights_device, bias_device] = ttnn.conv2d( + compute_config = ttnn.init_device_compute_kernel_config( + device.arch(), + math_fidelity=ttnn.MathFidelity.LoFi, + math_approx_mode=True, + fp32_dest_acc_en=False, + packer_l1_acc=False, + ) + x = ttnn.to_layout(input_tensor, layout=ttnn.ROW_MAJOR_LAYOUT) + x, [out_height, out_width] = ttnn.conv2d( input_tensor=input_tensor, weight_tensor=weight, in_channels=input_tensor.shape[3], @@ -38,14 +42,18 @@ def conv(device, input_tensor, batch_size, parameters): input_height=input_tensor.shape[1], input_width=input_tensor.shape[2], conv_config=conv_config, + compute_config=compute_config, conv_op_cache={}, groups=1, + return_output_dim=True, + return_weights_and_bias=False, ) - return x, out_height, out_width + return x, [out_height, out_width] -def lenet(input_tensor, batch_size, device, parameters): - conv_1, out_height, out_width = conv(device, input_tensor, batch_size, parameters.layer1) +def lenet(input_tensor, device, parameters): + batch_size = input_tensor.shape[0] + conv_1, [out_height, out_width] = conv(device, input_tensor, batch_size, parameters.layer1) conv_1 = ttnn.sharded_to_interleaved(conv_1, ttnn.L1_MEMORY_CONFIG) conv_1 = ttnn.to_layout(conv_1, layout=ttnn.ROW_MAJOR_LAYOUT) conv_1 = ttnn.pad(conv_1, [(0, 10)], value=0.0) @@ -64,7 +72,7 @@ def lenet(input_tensor, batch_size, device, parameters): maxpool_1 = ttnn.sharded_to_interleaved(maxpool_1, ttnn.L1_MEMORY_CONFIG) maxpool_1 = ttnn.reshape(maxpool_1, (batch_size, 14, 14, maxpool_1.shape[3])) - conv_2, out_height, out_width = conv(device, maxpool_1, batch_size, parameters.layer2) + conv_2, [out_height, out_width] = conv(device, maxpool_1, batch_size, parameters.layer2) conv_2 = ttnn.to_layout(conv_2, layout=ttnn.ROW_MAJOR_LAYOUT) maxpool_2 = ttnn.max_pool2d( diff --git a/tests/ttnn/integration_tests/lenet/test_lenet.py b/tests/ttnn/integration_tests/lenet/test_lenet.py index 8312ab6997a..ace93e19dbf 100644 --- a/tests/ttnn/integration_tests/lenet/test_lenet.py +++ b/tests/ttnn/integration_tests/lenet/test_lenet.py @@ -34,7 +34,7 @@ def test_lenet(device, batch_size, model_location_generator, reset_seeds): x = ttnn.from_torch( x, dtype=ttnn.bfloat16, device=device, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG ) - tt_output = tt_lenet.lenet(x, batch_size, device, parameters) + tt_output = tt_lenet.lenet(x, device, parameters) tt_output = ttnn.to_torch(tt_output)