diff --git a/tests/test_bco_trainer.py b/tests/test_bco_trainer.py
index d555d62aa6..2a2a81b4b4 100644
--- a/tests/test_bco_trainer.py
+++ b/tests/test_bco_trainer.py
@@ -98,11 +98,10 @@ def test_bco_trainer(self, name, pre_compute, eval_dataset, config_name):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param.cpu(), new_param.cpu()))
 
     @require_sklearn
@@ -220,11 +219,10 @@ def test_bco_trainer_without_providing_ref_model(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param.cpu(), new_param.cpu()))
 
     @require_sklearn
@@ -268,11 +266,10 @@ def embed_prompt(input_ids, attention_mask, model):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param.cpu(), new_param.cpu()))
 
     @require_sklearn
@@ -318,12 +315,11 @@ def test_bco_trainer_without_providing_ref_model_with_lora(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 if "lora" in n:
                     new_param = trainer.model.get_parameter(n)
-                    # check the params have changed - ignore 0 biases
-                    if param.sum() != 0:
+                    if param.sum() != 0:  # ignore 0 biases
                         self.assertFalse(torch.equal(param.cpu(), new_param.cpu()))
 
     @require_sklearn
diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py
index 95ecee723a..1656bdf932 100644
--- a/tests/test_cpo_trainer.py
+++ b/tests/test_cpo_trainer.py
@@ -89,11 +89,10 @@ def test_cpo_trainer(self, name, loss_type, config_name):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     @parameterized.expand(
@@ -147,10 +146,9 @@ def test_cpo_trainer_with_lora(self, config_name):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 if "lora" in n:
                     new_param = trainer.model.get_parameter(n)
-                    # check the params have changed - ignore 0 biases
-                    if param.sum() != 0:
+                    if param.sum() != 0:  # ignore 0 biases
                         self.assertFalse(torch.equal(param, new_param))
diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index 3413a0884e..d2c259da42 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -236,11 +236,10 @@ def test_dpo_trainer(self, name, loss_type, pre_compute):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     def test_dpo_trainer_with_weighting(self):
@@ -277,11 +276,10 @@ def test_dpo_trainer_with_weighting(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     @parameterized.expand(
@@ -323,11 +321,10 @@ def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha, _):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     def test_dpo_trainer_with_ref_model_is_model(self):
@@ -377,11 +374,10 @@ def test_precompute_ref_batch_size(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     @require_peft
@@ -428,12 +424,11 @@ def test_dpo_trainer_without_providing_ref_model_with_lora(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 if "lora" in n:
                     new_param = trainer.model.get_parameter(n)
-                    # check the params have changed - ignore 0 biases
-                    if param.sum() != 0:
+                    if param.sum() != 0:  # ignore 0 biases
                         self.assertFalse(torch.equal(param, new_param))
 
     def test_dpo_trainer_padding_token_is_none(self):
@@ -537,11 +532,10 @@ def test_tr_dpo_trainer(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.ref_model.get_parameter(n)
-                # check the ref model's params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     @require_no_wandb
@@ -1190,11 +1184,10 @@ def test_padding_free(self):
 
             trainer.train()
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
 
diff --git a/tests/test_kto_trainer.py b/tests/test_kto_trainer.py
index 06a5b9f2ce..523941c6e8 100644
--- a/tests/test_kto_trainer.py
+++ b/tests/test_kto_trainer.py
@@ -95,11 +95,10 @@ def test_kto_trainer(self, name, config_name, loss_type, pre_compute, eval_datas
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     def test_kto_trainer_with_ref_model_is_model(self):
@@ -238,11 +237,10 @@ def test_kto_trainer_without_providing_ref_model(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     @require_peft
@@ -288,12 +286,11 @@ def test_kto_trainer_without_providing_ref_model_with_lora(self):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 if "lora" in n:
                     new_param = trainer.model.get_parameter(n)
-                    # check the params have changed - ignore 0 biases
-                    if param.sum() != 0:
+                    if param.sum() != 0:  # ignore 0 biases
                         self.assertFalse(torch.equal(param, new_param))
 
     @require_no_wandb
diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py
index fbb8d3fbe7..aed639656d 100644
--- a/tests/test_orpo_trainer.py
+++ b/tests/test_orpo_trainer.py
@@ -84,11 +84,10 @@ def test_orpo_trainer(self, name, config_name):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.equal(param, new_param))
 
     @parameterized.expand(
@@ -141,10 +140,9 @@ def test_orpo_trainer_with_lora(self, config_name):
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 if "lora" in n:
                     new_param = trainer.model.get_parameter(n)
-                    # check the params have changed - ignore 0 biases
-                    if param.sum() != 0:
+                    if param.sum() != 0:  # ignore 0 biases
                         self.assertFalse(torch.equal(param, new_param))
diff --git a/tests/test_prm_trainer.py b/tests/test_prm_trainer.py
index a4d8bbe3dc..dd8b3f8dcd 100644
--- a/tests/test_prm_trainer.py
+++ b/tests/test_prm_trainer.py
@@ -211,11 +211,10 @@ def test_train_full(self, train_on_last_step_only):
             trainer.train()
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     def test_train_full_pretokenized(self):
@@ -266,11 +265,10 @@ def test_train_full_pretokenized(self):
             trainer.train()
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     @require_peft
@@ -309,12 +307,12 @@ def test_train_lora(self):
 
             self.assertIsNotNone(trainer.state.log_history[(-1)]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertFalse(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))
 
-            # check the non trainable params have not changed
+            # Check that the non trainable parameters have not changed
             for n, param in previous_non_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertTrue(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))
diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py
index c54cd2dd7c..51ea1183f7 100644
--- a/tests/test_reward_trainer.py
+++ b/tests/test_reward_trainer.py
@@ -70,11 +70,10 @@ def test_train_full(self):
             trainer.train()
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     def test_train_full_pretokenized(self):
@@ -90,11 +89,10 @@ def test_train_full_pretokenized(self):
             trainer.train()
 
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
-                # check the params have changed - ignore 0 biases
-                if param.sum() != 0:
+                if param.sum() != 0:  # ignore 0 biases
                     self.assertFalse(torch.allclose(param, new_param, rtol=1e-12, atol=1e-12))
 
     @require_peft
@@ -133,12 +131,12 @@ def test_train_lora(self):
 
             self.assertIsNotNone(trainer.state.log_history[(-1)]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertFalse(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))
 
-            # check the non trainable params have not changed
+            # Check that the non trainable parameters have not changed
             for n, param in previous_non_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertTrue(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))
@@ -181,12 +179,12 @@ def test_train_lora_pretokenized(self):
 
             self.assertIsNotNone(trainer.state.log_history[(-1)]["train_loss"])
 
-            # check the params have changed
+            # Check that the parameters have changed
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertFalse(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))
 
-            # check the non trainable params have not changed
+            # Check that the non trainable parameters have not changed
             for n, param in previous_non_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertTrue(torch.allclose(param, new_param, atol=1e-12, rtol=1e-12))