From 1d255650b7acb61ae839f10bdd44fa406469e623 Mon Sep 17 00:00:00 2001 From: Liqiang Lu Date: Fri, 7 Feb 2025 15:39:15 -0800 Subject: [PATCH 1/4] remove redundant cast --- tests/cpp/test_persistent_buffer.cpp | 44 +++++++++++++--------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/tests/cpp/test_persistent_buffer.cpp b/tests/cpp/test_persistent_buffer.cpp index a08650d712f..97c46626109 100644 --- a/tests/cpp/test_persistent_buffer.cpp +++ b/tests/cpp/test_persistent_buffer.cpp @@ -20,7 +20,7 @@ namespace nvfuser { using testing::Contains; -using testing::UnorderedElementsAre; +using testing::ElementsAre; using PersistentBufferTest = NVFuserTest; TEST_F(PersistentBufferTest, FusionPersistentBufferCalculation1_CUDA) { @@ -1472,18 +1472,23 @@ TEST_P(LayerNormSharedMemoryTest, FusionLayerNormSharedMemoryBuffer_CUDA) { constexpr int64_t dim0 = 2048; std::vector input_shape{dim0, hidden_size}; std::vector norm_shape{hidden_size}; - auto input_half = makeContigTensor(2, dtype); - auto weight_half = makeContigTensor(1, dtype); - auto bias_half = makeContigTensor(1, dtype); - fusion.addInput(input_half); - fusion.addInput(weight_half); - fusion.addInput(bias_half); - auto input = castOp(DataType::Float, input_half); - auto weight = castOp(DataType::Float, weight_half); - auto bias = castOp(DataType::Float, bias_half); + + auto input = makeContigTensor(2, dtype); + auto weight = makeContigTensor(1, dtype); + auto bias = makeContigTensor(1, dtype); + fusion.addInput(input); + fusion.addInput(weight); + fusion.addInput(bias); + if (dtype == DataType::Half) { + input = castOp(DataType::Float, input); + weight = castOp(DataType::Float, weight); + bias = castOp(DataType::Float, bias); + } auto result = layer_norm(input, norm_shape, weight, bias, eps_ptr); - auto result_output = castOp(dtype, result.output); - fusion.addOutput(result_output); + if (dtype == DataType::Half) { + result.output = castOp(DataType::Half, result.output); + } + fusion.addOutput(result.output); fusion.addOutput(result.mean); fusion.addOutput(result.invstd); @@ -1534,18 +1539,9 @@ TEST_P(LayerNormSharedMemoryTest, FusionLayerNormSharedMemoryBuffer_CUDA) { auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); auto runtime = executor_cache.getMostRecentKernelRuntime(); if (has_enough_regs_smem) { - // For dtype float, no op scheduler is also used. - if (dtype == DataType::Float) { - EXPECT_THAT( - runtime->fusionSegments()->groups(), - UnorderedElementsAre( - HeuristicIs(SchedulerType::NoOp), - HeuristicIs(SchedulerType::InnerPersistent))); - } else { - EXPECT_THAT( - runtime->fusionSegments()->groups(), - UnorderedElementsAre(HeuristicIs(SchedulerType::InnerPersistent))); - } + EXPECT_THAT( + runtime->fusionSegments()->groups(), + ElementsAre(HeuristicIs(SchedulerType::InnerPersistent))); Fusion* scheduled_fusion = runtime->executors() .back() ->as() From b80bbab0e6f7f606cc63950533e9d2618a6501c0 Mon Sep 17 00:00:00 2001 From: Liqiang Lu <116412316+liqiangxl@users.noreply.github.com> Date: Sat, 8 Feb 2025 19:32:31 -0500 Subject: [PATCH 2/4] Update tests/cpp/test_persistent_buffer.cpp Co-authored-by: Jingyue Wu --- tests/cpp/test_persistent_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp/test_persistent_buffer.cpp b/tests/cpp/test_persistent_buffer.cpp index 97c46626109..f89ee9b820a 100644 --- a/tests/cpp/test_persistent_buffer.cpp +++ b/tests/cpp/test_persistent_buffer.cpp @@ -1541,7 +1541,7 @@ TEST_P(LayerNormSharedMemoryTest, FusionLayerNormSharedMemoryBuffer_CUDA) { if (has_enough_regs_smem) { EXPECT_THAT( runtime->fusionSegments()->groups(), - ElementsAre(HeuristicIs(SchedulerType::InnerPersistent))); + UnorderedElementsAre(HeuristicIs(SchedulerType::InnerPersistent))); Fusion* scheduled_fusion = runtime->executors() .back() ->as() From b073f4cfe012b41a394313c1953809dadf387c04 Mon Sep 17 00:00:00 2001 From: Liqiang Lu Date: Sat, 8 Feb 2025 16:48:50 -0800 Subject: [PATCH 3/4] unorder --- tests/cpp/test_persistent_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp/test_persistent_buffer.cpp b/tests/cpp/test_persistent_buffer.cpp index f89ee9b820a..d9ac44a67b3 100644 --- a/tests/cpp/test_persistent_buffer.cpp +++ b/tests/cpp/test_persistent_buffer.cpp @@ -20,7 +20,7 @@ namespace nvfuser { using testing::Contains; -using testing::ElementsAre; +using testing::UnorderedElementsAre; using PersistentBufferTest = NVFuserTest; TEST_F(PersistentBufferTest, FusionPersistentBufferCalculation1_CUDA) { From b909282b00f3276015e6f3e50cda608da5a02fac Mon Sep 17 00:00:00 2001 From: Liqiang Lu Date: Sat, 8 Feb 2025 16:55:47 -0800 Subject: [PATCH 4/4] maybecast --- tests/cpp/test_persistent_buffer.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/cpp/test_persistent_buffer.cpp b/tests/cpp/test_persistent_buffer.cpp index d9ac44a67b3..0c44b3957a7 100644 --- a/tests/cpp/test_persistent_buffer.cpp +++ b/tests/cpp/test_persistent_buffer.cpp @@ -1479,15 +1479,11 @@ TEST_P(LayerNormSharedMemoryTest, FusionLayerNormSharedMemoryBuffer_CUDA) { fusion.addInput(input); fusion.addInput(weight); fusion.addInput(bias); - if (dtype == DataType::Half) { - input = castOp(DataType::Float, input); - weight = castOp(DataType::Float, weight); - bias = castOp(DataType::Float, bias); - } + input = maybeCastOp(DataType::Float, input); + weight = maybeCastOp(DataType::Float, weight); + bias = maybeCastOp(DataType::Float, bias); auto result = layer_norm(input, norm_shape, weight, bias, eps_ptr); - if (dtype == DataType::Half) { - result.output = castOp(DataType::Half, result.output); - } + result.output = maybeCastOp(dtype, result.output); fusion.addOutput(result.output); fusion.addOutput(result.mean); fusion.addOutput(result.invstd);