#16415: fix moreh_adam

tenstorrent · Jan 14, 2025 · efd7356 · efd7356
1 parent 4cfb561
commit efd7356
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 3 deletions.
diff --git a/tests/ttnn/unit_tests/operations/test_moreh_adam.py b/tests/ttnn/unit_tests/operations/test_moreh_adam.py
@@ -50,7 +50,7 @@ def forward(self, x):
     dev_param_out = create_tt_tensor(model.weight, device, dtype=dtype)
     dev_exp_avg_out = create_tt_tensor(cpu_exp_avg, device, dtype=dtype)
     dev_exp_avg_sq_out = create_tt_tensor(cpu_exp_avg_sq, device, dtype=dtype)
-    dev_max_exp_avg_sq_out = create_tt_tensor(cpu_max_exp_avg_sq, device, dtype=dtype)
+    dev_max_exp_avg_sq_out = create_tt_tensor(cpu_max_exp_avg_sq, device, dtype=dtype) if amsgrad else None
 
     criterion = nn.L1Loss()
     optimizer = optim.Adam({model.weight}, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad)

diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_adam/device/moreh_adam_device_operation.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_adam/device/moreh_adam_device_operation.cpp
@@ -101,6 +101,8 @@ MorehAdamOperation::tensor_return_value_t MorehAdamOperation::create_output_tens
             ret.push_back(tensor_args.output_tensors.at(idx).value());
         } else if (output_specs[idx].has_value()) {
             ret.push_back(create_device_tensor(*output_specs[idx], device));
+        } else {
+            ret.push_back(std::nullopt);
         }
     }
 

diff --git a/ttnn/cpp/ttnn/operations/moreh/moreh_adam/device/moreh_adam_program_factory.cpp b/ttnn/cpp/ttnn/operations/moreh/moreh_adam/device/moreh_adam_program_factory.cpp
@@ -104,7 +104,7 @@ MorehAdamOperation::ProgramFactory::cached_program_t MorehAdamOperation::Program
         static_cast<uint32_t>(is_dram(param_out)),
         static_cast<uint32_t>(is_dram(exp_avg_out)),
         static_cast<uint32_t>(is_dram(exp_avg_sq_out)),
-        static_cast<uint32_t>(is_dram(max_exp_avg_sq_out.value()))};
+        static_cast<uint32_t>(max_exp_avg_sq_out.has_value() ? is_dram(max_exp_avg_sq_out.value()) : false)};
 
     const auto reader_kernel_file =
         "ttnn/cpp/ttnn/operations/moreh/moreh_adam/device/kernels/"
@@ -272,7 +272,7 @@ void MorehAdamOperation::ProgramFactory::override_runtime_arguments(
     auto param_out_buffer = tensor_return_value.at(0)->buffer();
     auto exp_avg_out_buffer = tensor_return_value.at(1)->buffer();
     auto exp_avg_sq_out_buffer = tensor_return_value.at(2)->buffer();
-    auto max_exp_avg_sq_out_buffer = tensor_return_value.at(3)->buffer();
+    auto max_exp_avg_sq_out_buffer = operation_attributes.amsgrad ? tensor_return_value.at(3)->buffer() : nullptr;
 
     auto& core_group_1 = cached_program.shared_variables.core_group_1;
     auto& core_group_2 = cached_program.shared_variables.core_group_2;