#13647: use logical volume and do not short circuit for mean with no …

…dim (#15346) ### Ticket Link to Github Issue #13647 ### Problem description We used padded shape instead of logical shape to calculate mean and short circuited a return value instead of continuing to reshape to keep the dimensions. ### What's changed - use logical shape - do not short circuit return, and allow reshape to occur further down in the function code if needed for keepdim ### Checklist - [x] Post commit CI passes https://github.com/tenstorrent/tt-metal/actions/runs/11962594709 - [ ] Blackhole Post commit (if applicable) N/A - [ ] Model regression CI testing passes (if applicable) N/A - [ ] Device performance regression CI testing passes (if applicable) N/A - [x] New/Existing tests provide coverage for changes
tenstorrent · Nov 22, 2024 · fc129ac · fc129ac
1 parent 91b2a45
commit fc129ac
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 4 deletions.
diff --git a/tests/ttnn/unit_tests/operations/test_reduction_mean.py b/tests/ttnn/unit_tests/operations/test_reduction_mean.py
@@ -7,8 +7,8 @@
 import torch
 
 import ttnn
-from tests.ttnn.utils_for_testing import assert_with_pcc
-from models.utility_functions import torch_random
+from tests.ttnn.utils_for_testing import assert_with_pcc, construct_pcc_assert_message
+from models.utility_functions import torch_random, comp_allclose
 
 
 @pytest.mark.parametrize("batch_size", [1, 16, 1, 16])
@@ -26,3 +26,24 @@ def test_mean(device, batch_size, h, w, dim):
     output_tensor = ttnn.mean(input_tensor, dim=dim)
     output_tensor = ttnn.to_torch(output_tensor)
     assert_with_pcc(torch_output_tensor, output_tensor)
+
+
+@pytest.mark.parametrize("batch_size", [(1,), (4,), (64, 4), None])
+@pytest.mark.parametrize("h", [1, 32, 64, 41, 37])
+@pytest.mark.parametrize("w", [32, 64, 31, 63])
+def test_mean_without_dim(device, batch_size, h, w):
+    torch.manual_seed(0)
+    input_shape = (*batch_size, h, w) if batch_size else (h, w)
+
+    torch_input_tensor = torch_random(input_shape, -1, 1, dtype=torch.bfloat16)
+    torch_output_tensor = torch.mean(torch_input_tensor, None, True)
+
+    input_tensor = ttnn.from_torch(torch_input_tensor, layout=ttnn.TILE_LAYOUT, device=device)
+
+    output_tensor = ttnn.mean(input_tensor, keepdim=True)
+    output_tensor = ttnn.to_torch(output_tensor)
+    # PCC does not work for a single value. Assert on allclose.
+    close_passed, close_message = comp_allclose(torch_output_tensor, output_tensor, rtol=0.001, atol=0.001)
+    if not close_passed:
+        print(f"Found mismatch: torch_output_tensor {torch_output_tensor}\n output_tensor {output_tensor}")
+    assert close_passed, construct_pcc_assert_message(close_message, torch_output_tensor, output_tensor)
diff --git a/ttnn/cpp/ttnn/operations/reduction/generic/generic_reductions.cpp b/ttnn/cpp/ttnn/operations/reduction/generic/generic_reductions.cpp
@@ -128,8 +128,8 @@ static Tensor reduce_impl(
             for (int rank = input_tensor.get_legacy_shape().rank() - 1; rank >= 0; rank--) {
                 output_tensor = reduce_impl<ReduceType::Sum>(output_tensor, rank, true, memory_config, compute_kernel_config, scalar, false);
             }
-            float inv_volume = 1.0f/input_tensor.volume();
-            return ttnn::mul_sfpu(inv_volume, output_tensor, memory_config);
+            float inv_volume = 1.0f/input_tensor.get_logical_volume();
+            output_tensor = ttnn::mul_sfpu(inv_volume, output_tensor, memory_config);
         } else {
             TT_THROW("Unsupported reduction operation");
         }