deepspeedai · loadams · Oct 29, 2024 · Apr 29, 2024 · Apr 29, 2024 · Apr 29, 2024
@@ -287,7 +287,8 @@ def _exec_reduce_tied_grads(self):
         weight_group_list = self.module.get_tied_weights_and_groups()
         for weight, group in weight_group_list:
             grad = weight._hp_grad if self.using_bf16_optimizer else weight.grad
-            dist.all_reduce(grad, group=group)
+            if grad is not None:
+                dist.all_reduce(grad, group=group)
 
     def _exec_reduce_grads(self):
         self._force_grad_boundary = True