#13404: Update Conv2d for the Lenet model

tenstorrent · Dec 10, 2024 · a68a993 · a68a993
1 parent 030e5f8
commit a68a993
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 16 deletions.
diff --git a/models/demos/lenet/demo/demo.py b/models/demos/lenet/demo/demo.py
@@ -31,7 +31,7 @@ def run_demo_dataset(device, batch_size, iterations, model_location_generator, r
     for iters in range(iterations):
         x = test_input.permute(0, 2, 3, 1)
         x = ttnn.from_torch(x, dtype=ttnn.bfloat16)
-        tt_output = tt_lenet.lenet(x, batch_size, device, parameters)
+        tt_output = tt_lenet.lenet(x, device, parameters)
         tt_output = ttnn.to_torch(tt_output)
         _, torch_predicted = torch.max(torch_output.data, -1)
         _, ttnn_predicted = torch.max(tt_output.data, -1)

diff --git a/models/demos/lenet/tests/test_perf_lenet.py b/models/demos/lenet/tests/test_perf_lenet.py
@@ -24,11 +24,11 @@
 def get_expected_times(tt_lenet):
     if is_grayskull():
         return {
-            tt_lenet: (7.2, 0.05),
+            tt_lenet: (7.62, 0.05),
         }[tt_lenet]
     elif is_wormhole_b0():
         return {
-            tt_lenet: (10.1557, 0.045),
+            tt_lenet: (10.29, 0.047),
         }[tt_lenet]
 
 
@@ -64,7 +64,6 @@ def test_perf_lenet(device, batch_size, tt_lenet, model_location_generator, rese
         ttnn_output = tt_lenet.lenet(
             device=device,
             input_tensor=x,
-            batch_size=batch_size,
             parameters=parameters,
         )
         end = time.time()
@@ -106,9 +105,9 @@ def test_perf_device_bare_metal(batch_size, reset_seeds):
     num_iterations = 1
     margin = 0.03
     if is_grayskull():
-        expected_perf = 110955.849
+        expected_perf = 83102.20
     elif is_wormhole_b0():
-        expected_perf = 60971.775
+        expected_perf = 46313.985
 
     command = f"pytest tests/ttnn/integration_tests/lenet/test_lenet.py"
     cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]

diff --git a/models/demos/lenet/tt/tt_lenet.py b/models/demos/lenet/tt/tt_lenet.py
@@ -12,19 +12,23 @@ def conv(device, input_tensor, batch_size, parameters):
     conv_config = ttnn.Conv2dConfig(
         dtype=ttnn.bfloat16,
         weights_dtype=ttnn.bfloat16,
-        math_fidelity=ttnn.MathFidelity.LoFi,
         activation="relu",
         shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
-        math_approx_mode_enabled=True,
-        fp32_dest_acc_enabled=False,
-        packer_l1_accum_enabled=False,
         input_channels_alignment=32,
         transpose_shards=False,
         reshard_if_not_optimal=True,
         deallocate_activation=True,
         reallocate_halo_output=True,
     )
-    [x, out_height, out_width, weights_device, bias_device] = ttnn.conv2d(
+    compute_config = ttnn.init_device_compute_kernel_config(
+        device.arch(),
+        math_fidelity=ttnn.MathFidelity.LoFi,
+        math_approx_mode=True,
+        fp32_dest_acc_en=False,
+        packer_l1_acc=False,
+    )
+    x = ttnn.to_layout(input_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
+    x, [out_height, out_width] = ttnn.conv2d(
         input_tensor=input_tensor,
         weight_tensor=weight,
         in_channels=input_tensor.shape[3],
@@ -38,14 +42,18 @@ def conv(device, input_tensor, batch_size, parameters):
         input_height=input_tensor.shape[1],
         input_width=input_tensor.shape[2],
         conv_config=conv_config,
+        compute_config=compute_config,
         conv_op_cache={},
         groups=1,
+        return_output_dim=True,
+        return_weights_and_bias=False,
     )
-    return x, out_height, out_width
+    return x, [out_height, out_width]
 
 
-def lenet(input_tensor, batch_size, device, parameters):
-    conv_1, out_height, out_width = conv(device, input_tensor, batch_size, parameters.layer1)
+def lenet(input_tensor, device, parameters):
+    batch_size = input_tensor.shape[0]
+    conv_1, [out_height, out_width] = conv(device, input_tensor, batch_size, parameters.layer1)
     conv_1 = ttnn.sharded_to_interleaved(conv_1, ttnn.L1_MEMORY_CONFIG)
     conv_1 = ttnn.to_layout(conv_1, layout=ttnn.ROW_MAJOR_LAYOUT)
     conv_1 = ttnn.pad(conv_1, [(0, 10)], value=0.0)
@@ -64,7 +72,7 @@ def lenet(input_tensor, batch_size, device, parameters):
 
     maxpool_1 = ttnn.sharded_to_interleaved(maxpool_1, ttnn.L1_MEMORY_CONFIG)
     maxpool_1 = ttnn.reshape(maxpool_1, (batch_size, 14, 14, maxpool_1.shape[3]))
-    conv_2, out_height, out_width = conv(device, maxpool_1, batch_size, parameters.layer2)
+    conv_2, [out_height, out_width] = conv(device, maxpool_1, batch_size, parameters.layer2)
     conv_2 = ttnn.to_layout(conv_2, layout=ttnn.ROW_MAJOR_LAYOUT)
 
     maxpool_2 = ttnn.max_pool2d(

diff --git a/tests/ttnn/integration_tests/lenet/test_lenet.py b/tests/ttnn/integration_tests/lenet/test_lenet.py
@@ -34,7 +34,7 @@ def test_lenet(device, batch_size, model_location_generator, reset_seeds):
     x = ttnn.from_torch(
         x, dtype=ttnn.bfloat16, device=device, layout=ttnn.TILE_LAYOUT, memory_config=ttnn.L1_MEMORY_CONFIG
     )
-    tt_output = tt_lenet.lenet(x, batch_size, device, parameters)
+    tt_output = tt_lenet.lenet(x, device, parameters)
 
     tt_output = ttnn.to_torch(tt_output)