diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 545815fe67..2bd74e5297 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,37 +1,10 @@
 repos:
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.0
     hooks:
-      - id: isort
-        args:
-          - --profile=black
-          - --skip-glob=wandb/**/*
-          - --thirdparty=wandb
-  - repo: https://github.com/myint/autoflake
-    rev: v1.4
-    hooks:
-      - id: autoflake
-        args:
-          - -r
-          - --exclude=wandb,__init__.py
-          - --in-place
-          - --remove-unused-variables
-          - --remove-all-unused-imports
-  - repo: https://github.com/python/black
-    rev: 22.3.0
-    hooks:
-      - id: black
-        args:
-          - --line-length=119
-          - --target-version=py38
-          - --exclude=wandb
-  - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
-        args:
-          - --ignore=E203,E501,W503,E128
-          - --max-line-length=119
+      - id: ruff
+        args: [ --fix ]
+      - id: ruff-format
 
   # - repo: https://github.com/codespell-project/codespell
   #   rev: v2.1.0
diff --git a/examples/hello_world.py b/examples/hello_world.py
index 4ba1c9b890..b2beb20293 100644
--- a/examples/hello_world.py
+++ b/examples/hello_world.py
@@ -29,7 +29,7 @@
     "pad_token_id": tokenizer.eos_token_id,
     "max_new_tokens": 20,
 }
-response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs)
+response_tensor = ppo_trainer.generate(list(query_tensor), return_prompt=False, **generation_kwargs)
 response_txt = tokenizer.decode(response_tensor[0])
 
 # 5. define a reward for response
diff --git a/examples/research_projects/stack_llama/scripts/rl_training.py b/examples/research_projects/stack_llama/scripts/rl_training.py
index 225d6b810a..c8502c5e89 100644
--- a/examples/research_projects/stack_llama/scripts/rl_training.py
+++ b/examples/research_projects/stack_llama/scripts/rl_training.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -163,7 +162,7 @@ def preprocess_function(examples):
 
 
 def collator(data):
-    return dict((key, [d[key] for d in data]) for key in data[0])
+    return {key: [d[key] for d in data] for key in data[0]}
 
 
 # set seed before initializing value head for deterministic eval
diff --git a/examples/research_projects/tools/calculator.py b/examples/research_projects/tools/calculator.py
index 76779695fe..122366ddaf 100644
--- a/examples/research_projects/tools/calculator.py
+++ b/examples/research_projects/tools/calculator.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -107,7 +106,7 @@ def exact_match_reward(responses, answers=None):
 )
 
 # main training loop
-for step in range(100):
+for _step in range(100):
     tasks, answers = generate_data(ppo_config.batch_size)
     queries, responses, masks, rewards, histories = text_env.run(tasks, answers=answers)
     train_stats = ppo_trainer.step(queries, responses, rewards, masks)
diff --git a/examples/research_projects/tools/python_interpreter.py b/examples/research_projects/tools/python_interpreter.py
index b7b69806ef..2e40f91ad1 100644
--- a/examples/research_projects/tools/python_interpreter.py
+++ b/examples/research_projects/tools/python_interpreter.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -61,9 +60,9 @@ def exact_match_reward(responses, answers=None):
             if match_pattern:
                 predicted_number = float(match_pattern[0])
             if predicted_number is not None:
-                if np.abs((predicted_number - float(answer))) < 0.1:
+                if np.abs(predicted_number - float(answer)) < 0.1:
                     reward += 1.0
-        except:  # noqa
+        except Exception:
             pass
         rewards.append(torch.tensor(reward))
     return rewards
diff --git a/examples/research_projects/tools/triviaqa.py b/examples/research_projects/tools/triviaqa.py
index bd3bd90166..5eb5044c2b 100644
--- a/examples/research_projects/tools/triviaqa.py
+++ b/examples/research_projects/tools/triviaqa.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -114,7 +113,7 @@ class ScriptArguments:
 
 def data_generator():
     for i in range(len(dataset)):
-        yield dataset[i]["question"], [item for item in dataset[i]["answer"]["normalized_aliases"]]
+        yield dataset[i]["question"], list(dataset[i]["answer"]["normalized_aliases"])
 
 
 gen = data_generator()
@@ -123,7 +122,7 @@ def data_generator():
 
 def generate_data(n):
     tasks, answers = [], []
-    for i in range(n):
+    for _i in range(n):
         q, a = next(gen)
         tasks.append(q)
         answers.append(a)
@@ -143,10 +142,14 @@ def exact_match_reward(responses, answers=None):
     return rewards
 
 
+def tool_fn(x):
+    # limit the amount of tokens
+    return tool(x).split("\n")[1][:600]
+
+
 # text env
 tool = load_tool("vwxyzjn/pyserini-wikipedia-kilt-doc")
-# limit the amount if tokens
-tool_fn = lambda x: tool(x).split("\n")[1][:600]  # noqa
+
 text_env = TextEnvironment(
     model,
     tokenizer,
@@ -184,8 +187,6 @@ def print_trainable_parameters(model):
         "answer": [", ".join(item) for item in answers],
     }
     all_rewards = ppo_trainer.accelerator.gather(torch.tensor(rewards, device=ppo_trainer.accelerator.device))
-    ppo_trainer.log_stats(
-        train_stats, texts, [item for item in all_rewards], columns_to_log=["query", "response", "answer"]
-    )
+    ppo_trainer.log_stats(train_stats, texts, list(all_rewards), columns_to_log=["query", "response", "answer"])
     if i % 100 == 0:
         ppo_trainer.save_pretrained(f"models/{args.model_name}_{args.seed}_{i}_triviaqa")
diff --git a/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py b/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
index 83e89b2caf..51f6d284c4 100644
--- a/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
+++ b/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -146,7 +145,7 @@ def tokenize(sample):
 
 
 def collator(data):
-    return dict((key, [d[key] for d in data]) for key in data[0])
+    return {key: [d[key] for d in data] for key in data[0]}
 
 
 # set seed before initializing value head for deterministic eval
@@ -218,7 +217,7 @@ def collator(data):
         response_tensors.append(response.squeeze()[-gen_len:])
     batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
 
-    # Compute sentiment score # noqa
+    # Compute sentiment score
     texts = batch["response"]
     toxicity_inputs = toxicity_tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to(
         ppo_trainer.accelerator.device
diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py
index 1a08f50818..587a5efdfb 100644
--- a/examples/scripts/dpo.py
+++ b/examples/scripts/dpo.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/examples/scripts/ppo.py b/examples/scripts/ppo.py
index b412e69db4..9282144e66 100644
--- a/examples/scripts/ppo.py
+++ b/examples/scripts/ppo.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -95,7 +94,7 @@ def tokenize(sample):
 
 
 def collator(data):
-    return dict((key, [d[key] for d in data]) for key in data[0])
+    return {key: [d[key] for d in data] for key in data[0]}
 
 
 # set seed before initializing value head for deterministic eval
@@ -171,7 +170,7 @@ def collator(data):
     "max_new_tokens": 32,
 }
 
-for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
+for _epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
     query_tensors = batch["input_ids"]
 
     # Get response from gpt2
diff --git a/examples/scripts/ppo_multi_adapter.py b/examples/scripts/ppo_multi_adapter.py
index 2bd489dfbd..782235781b 100644
--- a/examples/scripts/ppo_multi_adapter.py
+++ b/examples/scripts/ppo_multi_adapter.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -97,7 +96,7 @@ def tokenize(example):
 
 
 def collator(data):
-    return dict((key, [d[key] for d in data]) for key in data[0])
+    return {key: [d[key] for d in data] for key in data[0]}
 
 
 config = PPOConfig(
@@ -131,7 +130,7 @@ def collator(data):
     "max_new_tokens": 32,
 }
 
-for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
+for _epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
     question_tensors = batch["input_ids"]
 
     response_tensors = ppo_trainer.generate(
diff --git a/examples/scripts/reward_modeling.py b/examples/scripts/reward_modeling.py
index 34af55987c..48a504cc35 100644
--- a/examples/scripts/reward_modeling.py
+++ b/examples/scripts/reward_modeling.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/examples/scripts/sft.py b/examples/scripts/sft.py
index 5072920cb4..61f5eedb03 100644
--- a/examples/scripts/sft.py
+++ b/examples/scripts/sft.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/pyproject.toml b/pyproject.toml
index 7e6b3f84fa..301264c1d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,16 +1,22 @@
-[tool.black]
-line-length = 119
-target-version = ['py38']
-
 [tool.ruff]
-ignore = ["E501", "E741", "W605"]
-select = ["E", "F", "I", "W"]
+target-version = "py37"
 line-length = 119
 
-# Ignore import violations in all `__init__.py` files.
-[tool.ruff.per-file-ignores]
-"__init__.py" = ["E402", "F401", "F403", "F811"]
+[tool.ruff.lint]
+ignore = [
+    "B028", # warning without explicit stacklevel
+    "C408", # dict() calls (stylistic)
+    "C901", # function complexity
+    "E501",
+]
+extend-select = ["E", "F", "I", "W", "UP", "B", "T", "C"]
+
+[tool.ruff.lint.per-file-ignores]
+# Allow prints in auxiliary scripts
+"benchmark/**.py" = ["T201"]
+"examples/**.py" = ["T201"]
+"scripts/**.py" = ["T201"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 lines-after-imports = 2
 known-first-party = ["trl"]
diff --git a/scripts/log_example_reports.py b/scripts/log_example_reports.py
index a7918925b9..b49a608826 100644
--- a/scripts/log_example_reports.py
+++ b/scripts/log_example_reports.py
@@ -31,7 +31,7 @@ def main(text_file_name, slack_channel_name=None):
     if os.path.isfile(text_file_name):
         final_results = {}
 
-        file = open(text_file_name, "r")
+        file = open(text_file_name)
         lines = file.readlines()
         for line in lines:
             result, config_name = line.split(",")
diff --git a/scripts/log_reports.py b/scripts/log_reports.py
index de4b27c1e5..5fd38c44cb 100644
--- a/scripts/log_reports.py
+++ b/scripts/log_reports.py
@@ -40,7 +40,7 @@ def main(slack_channel_name=None):
     for log in Path().glob("*.log"):
         section_num_failed = 0
         i = 0
-        with open(log, "r") as f:
+        with open(log) as f:
             for line in f:
                 line = json.loads(line)
                 i += 1
diff --git a/scripts/stale.py b/scripts/stale.py
index de7b869c13..0713f7f419 100644
--- a/scripts/stale.py
+++ b/scripts/stale.py
@@ -35,7 +35,7 @@ def main():
     open_issues = repo.get_issues(state="open")
 
     for issue in open_issues:
-        comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
+        comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True)
         last_comment = comments[0] if len(comments) > 0 else None
         if (
             last_comment is not None
diff --git a/setup.cfg b/setup.cfg
index cb69438f56..0c9e0fc144 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,11 +1,2 @@
 [metadata]
 license_file = LICENSE
-
-[isort]
-ensure_newline_before_comments = True
-force_grid_wrap = 0
-include_trailing_comma = True
-line_length = 119
-lines_after_imports = 2
-multi_line_output = 3
-use_parentheses = True
diff --git a/tests/test_no_peft.py b/tests/test_no_peft.py
index d16b8eca8f..1c9dd9b02c 100644
--- a/tests/test_no_peft.py
+++ b/tests/test_no_peft.py
@@ -95,14 +95,14 @@ def test_no_peft(self):
 
             # Check that loading a model with `peft` will raise an error
             with pytest.raises(ModuleNotFoundError):
-                import peft  # noqa
+                import peft  # noqa: F401
 
-            trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id)  # noqa
-            trl_seq2seq_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(self.seq_to_seq_model_id)  # noqa
+            _trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id)
+            _trl_seq2seq_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(self.seq_to_seq_model_id)
 
     def test_imports_no_peft(self):
         with patch.dict(sys.modules, {"peft": None}):
-            from trl import (  # noqa
+            from trl import (  # noqa: F401
                 AutoModelForCausalLMWithValueHead,
                 AutoModelForSeq2SeqLMWithValueHead,
                 PPOConfig,
@@ -141,7 +141,7 @@ def test_ppo_trainer_no_peft(self):
                 # (this could be any reward such as human feedback or output from another model)
                 reward = [torch.tensor(1.0), torch.tensor(0.0)]
                 # train model
-                train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+                train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
                 break
 
             # check gradients are not None
diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py
index e2a9d2a138..a5f096feff 100644
--- a/tests/test_ppo_trainer.py
+++ b/tests/test_ppo_trainer.py
@@ -200,7 +200,7 @@ def test_ppo_step(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         for param in ppo_trainer.model.parameters():
@@ -230,9 +230,7 @@ def test_ppo_step_with_masks(self):
             response_mask = [torch.ones_like(r) for r in response_tensor]
 
             # train model
-            train_stats = ppo_trainer.step(
-                [q for q in query_tensor], [r for r in response_tensor], reward, response_mask
-            )
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward, response_mask)
             break
 
         for param in ppo_trainer.model.parameters():
@@ -264,7 +262,7 @@ def test_ppo_step_with_no_ref_sgd(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         for name, param in ppo_trainer.model.named_parameters():
@@ -304,8 +302,8 @@ def test_ppo_step_with_no_ref_sgd_lr_scheduler(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
-            train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         for name, param in ppo_trainer.model.named_parameters():
@@ -341,7 +339,7 @@ def test_ppo_step_with_no_ref(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         for name, param in ppo_trainer.model.named_parameters():
@@ -391,7 +389,7 @@ def test_ppo_step_with_no_ref_custom_layers(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         pattern = r".*transformer\.h\.(\d+)\..*"
@@ -404,7 +402,7 @@ def test_ppo_step_with_no_ref_custom_layers(self):
                     assert param.grad is None, f"Parameter {name} has a gradient"
                 else:
                     assert param.grad is not None, f"Parameter {name} has no gradient"
-            elif any([layer in name for layer in final_layers]):
+            elif any(layer in name for layer in final_layers):
                 assert param.grad is not None, f"Parameter {name} has no gradient"
 
         # ref model should not be trained
@@ -459,11 +457,11 @@ def test_ppo_step_rewards_shape(self):
             reward = [torch.tensor([[1.0]]), torch.tensor([[0.0]])]
             # train model - this should raise an error
             with pytest.raises(ValueError):
-                _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+                _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
 
             reward = [torch.tensor([1.0]), torch.tensor([0.0])]
             # train model - this should work
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         # check if the gradients are computed for the model
@@ -498,7 +496,7 @@ def test_ppo_step_input_shape(self):
             bs = ppo_trainer.config.batch_size
 
             queries, responses, _, _ = ppo_trainer._step_safety_checker(
-                bs, [q for q in query_tensor], [r for r in response_tensor], reward
+                bs, list(query_tensor), list(response_tensor), reward
             )
 
             assert isinstance(queries, list), f"queries should be a list, got {type(queries)}"
@@ -703,7 +701,7 @@ def test_ppo_trainer_max_grad_norm(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         # check gradients
@@ -892,11 +890,11 @@ def make_inputs_require_grad(module, input, output):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model by running a step twice
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
 
             ppo_trainer.model.train()
             ppo_trainer.model.gradient_checkpointing_enable()
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         # check gradients
@@ -980,11 +978,11 @@ def make_inputs_require_grad(module, input, output):
                 # (this could be any reward such as human feedback or output from another model)
                 reward = [torch.tensor(1.0), torch.tensor(0.0)]
                 # train model by running a step twice
-                _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+                _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
 
                 ppo_trainer.model.train()
                 ppo_trainer.model.gradient_checkpointing_enable()
-                _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+                _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
                 break
 
             new_logits = ppo_trainer.model.compute_reward_score(dummy_inputs)
@@ -1090,11 +1088,11 @@ def make_inputs_require_grad(module, input, output):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model by running a step twice
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
 
             ppo_trainer.model.train()
             ppo_trainer.model.gradient_checkpointing_enable()
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         # check gradients
@@ -1160,7 +1158,7 @@ def test_grad_accumulation(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(1.0)]
             # train model by running a step twice
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         model_grad = gpt2_model.v_head.summary.weight
@@ -1184,7 +1182,7 @@ def test_grad_accumulation(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(1.0)]
             # train model by running a step twice
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
         model_grad_acc = gpt2_model_clone.v_head.summary.weight
@@ -1222,7 +1220,7 @@ def test_push_to_hub_if_best_reward(self):
             # (this could be any reward such as human feedback or output from another model)
             reward = [torch.tensor(1.0), torch.tensor(0.0)]
             # train model
-            _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
+            _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
             break
 
     def test_batch_size_check(self):
diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py
index adf98ededd..5e1ea9dfd2 100644
--- a/tests/test_reward_trainer.py
+++ b/tests/test_reward_trainer.py
@@ -176,7 +176,7 @@ def test_reward_trainer_peft(self):
 
             # check gradients are not None
             for n, param in trainer.model.named_parameters():
-                if any([t in n for t in trainable_params_name]):
+                if any(t in n for t in trainable_params_name):
                     previous_trainable_params[n] = param.clone()
                 else:
                     previous_non_trainable_params[n] = param.clone()
diff --git a/trl/core.py b/trl/core.py
index 1a0e8761a6..9d92ee18f0 100644
--- a/trl/core.py
+++ b/trl/core.py
@@ -30,7 +30,7 @@
 try:
     from collections.abc import Mapping
 except ImportError:
-    from collections import Mapping
+    from collections.abc import Mapping
 
 
 WANDB_PADDING = -1
@@ -80,7 +80,7 @@ def stack_dicts(stats_dicts: List[Dict]) -> Dict:
 
 def add_suffix(input_dict: Dict, suffix: str) -> Dict:
     """Add suffix to dict keys."""
-    return dict((k + suffix, v) for k, v in input_dict.items())
+    return {k + suffix: v for k, v in input_dict.items()}
 
 
 def pad_to_size(tensor: torch.Tensor, size: int, dim: int = 1, padding: int = 50256) -> torch.Tensor:
@@ -194,7 +194,7 @@ def respond_to_batch(
 ) -> torch.LongTensor:
     """Sample text from language model."""
     input_ids = queries
-    for i in range(txt_len):
+    for _i in range(txt_len):
         # Get Logits
         outputs = model(input_ids)
         next_token_logits = outputs[0][:, -1, :]
@@ -236,7 +236,7 @@ def __call__(self) -> int:
         return np.random.choice(self.values)
 
 
-class PPODecorators(object):
+class PPODecorators:
     optimize_device_cache = False
 
     @classmethod
diff --git a/trl/environment/base_environment.py b/trl/environment/base_environment.py
index 58b61fd17f..7037166d76 100644
--- a/trl/environment/base_environment.py
+++ b/trl/environment/base_environment.py
@@ -46,7 +46,7 @@ def __call__(self, input_ids, scores, **kwargs):
         done = []
 
         for i, decoded_generation in enumerate(decoded_generations):
-            sequence_complete = any([stop_string in decoded_generation for stop_string in self.stop_strings])
+            sequence_complete = any(stop_string in decoded_generation for stop_string in self.stop_strings)
             done.append(sequence_complete)
             if not sequence_complete:
                 self.generated_tokens[i] += 1
@@ -243,7 +243,7 @@ def __init__(
         if isinstance(tools, dict):
             self.tools = tools
         else:
-            self.tools = dict([(tool.__class__.__name__, tool) for tool in tools])
+            self.tools = {tool.__class__.__name__: tool for tool in tools}
         self.reward_fn = reward_fn
         self.max_length = max_length
         self.request_token = "<request>"
@@ -278,7 +278,7 @@ def run(self, queries, **rewards_kwargs):
 
         histories = [TextHistory(q, qt, system=True) for q, qt in zip(queries, queries_tokens)]
 
-        while any([not history.completed for history in histories]) and turns < self.max_turns:
+        while any(not history.completed for history in histories) and turns < self.max_turns:
             histories = self.generate(histories)
             histories = self.tasks_end_check(histories)
             # TODO: make this parallel rather than for-loop
diff --git a/trl/extras/best_of_n_sampler.py b/trl/extras/best_of_n_sampler.py
index 1441eecd41..b400b14b18 100644
--- a/trl/extras/best_of_n_sampler.py
+++ b/trl/extras/best_of_n_sampler.py
@@ -7,7 +7,7 @@
 from ..models import SUPPORTED_ARCHITECTURES, PreTrainedModelWrapper
 
 
-class BestOfNSampler(object):
+class BestOfNSampler:
     def __init__(
         self,
         model: PreTrainedModelWrapper,
diff --git a/trl/models/modeling_base.py b/trl/models/modeling_base.py
index f6d4e86bba..f7894ddedb 100644
--- a/trl/models/modeling_base.py
+++ b/trl/models/modeling_base.py
@@ -71,6 +71,7 @@ class PreTrainedModelWrapper(nn.Module):
         supported_args: (`list`)
             The list of arguments that are supported by the wrapper class.
     """
+
     transformers_parent_class = None
     supported_args = None
     supported_modules = ("v_head",)
@@ -378,12 +379,12 @@ def _get_checkpoint_from_hub(
                     )
             # load json
             if is_resuming_training:
-                with open(index_file_name, "r") as f:
+                with open(index_file_name) as f:
                     index = json.load(f)
                 # check filename with `v_head` or any known extra module:
                 files_to_download = set()
                 for k, v in index["weight_map"].items():
-                    if any([module in k for module in cls.supported_modules]):
+                    if any(module in k for module in cls.supported_modules):
                         files_to_download.add(v)
                 is_sharded = True
 
@@ -460,7 +461,7 @@ def add_and_load_reward_modeling_adapter(
                     "adapter_model.bin",
                     token=token,
                 )
-            except:  # noqa
+            except Exception:
                 filename = os.path.join(adapter_model_id, "adapter_model.safetensors")
                 safe_loading = True
                 if not os.path.exists(filename):
@@ -470,10 +471,11 @@ def add_and_load_reward_modeling_adapter(
                             "adapter_model.safetensors",
                             token=token,
                         )
-                    except:  # noqa
+                    except Exception as exc:
                         raise ValueError(
-                            "Could not find adapter model in the Hub, make sure you have the correct adapter model id."
-                        )
+                            "Could not find adapter model in the Hub, "
+                            "make sure you have the correct adapter model id."
+                        ) from exc
                 else:
                     local_filename = filename
         else:
@@ -485,7 +487,7 @@ def add_and_load_reward_modeling_adapter(
         adapter_state_dict = loading_func(local_filename, **load_kwargs)
 
         for score_name_candidate in cls.supported_rm_modules:
-            if any([score_name_candidate in name for name in adapter_state_dict.keys()]):
+            if any(score_name_candidate in name for name in adapter_state_dict.keys()):
                 score_name = score_name_candidate
                 # we have found the correct head name and can break
                 break
@@ -498,7 +500,7 @@ def add_and_load_reward_modeling_adapter(
                 score_dict[key_name] = param.to(cls._get_current_device())
 
         num_labels, hidden_dim = score_dict["weight"].shape
-        has_bias = any(["bias" in name for name in adapter_state_dict.keys()])
+        has_bias = any("bias" in name for name in adapter_state_dict.keys())
 
         score = nn.Linear(hidden_dim, num_labels, bias=has_bias).to(
             device=cls._get_current_device(),
@@ -636,7 +638,7 @@ def create_reference_model(
     else:
         for pattern_candidate in LAYER_PATTERNS:
             pattern_candidate = pattern_candidate.format(layer=num_shared_layers)
-            if any([pattern_candidate in name for name in parameter_names]):
+            if any(pattern_candidate in name for name in parameter_names):
                 pattern = pattern_candidate
                 break
 
@@ -648,7 +650,7 @@ def create_reference_model(
     unshared_param_list = []
 
     shared_parameter = True
-    for name, param in model.named_parameters():
+    for name, _param in model.named_parameters():
         if pattern in name:
             shared_parameter = False
         if shared_parameter:
@@ -661,8 +663,7 @@ def create_reference_model(
         param = model.get_parameter(param_name)
         param.requires_grad = False
 
-        ref_param = ref_model.get_parameter(param_name)  # noqa
-        ref_param = param  # noqa
+        _ref_param = ref_model.get_parameter(param_name)
 
     # for all other parameters just make sure they don't use gradients
     for param_name in unshared_param_list:
diff --git a/trl/models/modeling_sd_base.py b/trl/models/modeling_sd_base.py
index 954e71fff1..2cfb842408 100644
--- a/trl/models/modeling_sd_base.py
+++ b/trl/models/modeling_sd_base.py
@@ -34,7 +34,7 @@
 
 
 @dataclass
-class DDPOPipelineOutput(object):
+class DDPOPipelineOutput:
     """
     Output class for the diffusers pipeline to be finetuned with the DDPO trainer
 
@@ -54,7 +54,7 @@ class DDPOPipelineOutput(object):
 
 
 @dataclass
-class DDPOSchedulerOutput(object):
+class DDPOSchedulerOutput:
     """
     Output class for the diffusers scheduler to be finetuned with the DDPO trainer
 
@@ -69,7 +69,7 @@ class DDPOSchedulerOutput(object):
     log_probs: torch.Tensor
 
 
-class DDPOStableDiffusionPipeline(object):
+class DDPOStableDiffusionPipeline:
     """
     Main class for the diffusers pipeline to be finetuned with the DDPO trainer
     """
diff --git a/trl/models/modeling_value_head.py b/trl/models/modeling_value_head.py
index 2771cc6ce2..457546ab51 100644
--- a/trl/models/modeling_value_head.py
+++ b/trl/models/modeling_value_head.py
@@ -85,6 +85,7 @@ class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper):
                 - **"normal"** -- Initializes the weights of the `ValueHead` with a normal distribution.
 
     """
+
     transformers_parent_class = AutoModelForCausalLM
     lm_head_namings = ["lm_head", "embed_out"]
     supported_args = (
@@ -218,7 +219,7 @@ def state_dict(self, *args, **kwargs):
         return pretrained_model_state_dict
 
     def push_to_hub(self, *args, **kwargs):
-        setattr(self.pretrained_model, "v_head", self.v_head)
+        self.pretrained_model.v_head = self.v_head
 
         return self.pretrained_model.push_to_hub(*args, **kwargs)
 
@@ -276,6 +277,7 @@ class AutoModelForSeq2SeqLMWithValueHead(PreTrainedModelWrapper):
         kwargs:
             Additional keyword arguments passed along to the `ValueHead` class.
     """
+
     transformers_parent_class = AutoModelForSeq2SeqLM
     lm_head_namings = ["lm_head", "embed_out", "output_projection"]
     supported_args = (
@@ -298,7 +300,7 @@ def __init__(self, pretrained_model, **kwargs):
 
     def _has_lm_head(self):
         # check module names of all modules inside `pretrained_model` to find the language model head
-        for name, module in self.pretrained_model.named_modules():
+        for name, _module in self.pretrained_model.named_modules():
             if any(attribute in name for attribute in self.lm_head_namings):
                 return True
         return False
@@ -374,7 +376,7 @@ def state_dict(self, *args, **kwargs):
         return pretrained_model_state_dict
 
     def push_to_hub(self, *args, **kwargs):
-        setattr(self.pretrained_model, "v_head", self.v_head)
+        self.pretrained_model.v_head = self.v_head
 
         return self.pretrained_model.push_to_hub(*args, **kwargs)
 
diff --git a/trl/trainer/ddpo_config.py b/trl/trainer/ddpo_config.py
index 3108613814..b73bd58d05 100644
--- a/trl/trainer/ddpo_config.py
+++ b/trl/trainer/ddpo_config.py
@@ -107,7 +107,7 @@ def to_dict(self):
     def __post_init__(self):
         if self.log_with not in ["wandb", "tensorboard"]:
             warnings.warn(
-                ("Accelerator tracking only supports image logging if `log_with` is set to 'wandb' or 'tensorboard'.")
+                "Accelerator tracking only supports image logging if `log_with` is set to 'wandb' or 'tensorboard'."
             )
 
         if self.log_with == "wandb" and not is_torchvision_available():
diff --git a/trl/trainer/ddpo_trainer.py b/trl/trainer/ddpo_trainer.py
index 9fab11120f..df219da707 100644
--- a/trl/trainer/ddpo_trainer.py
+++ b/trl/trainer/ddpo_trainer.py
@@ -523,7 +523,7 @@ def _train_batched_samples(self, inner_epoch, epoch, global_step, batched_sample
             global_step (int): The updated global step
         """
         info = defaultdict(list)
-        for i, sample in enumerate(batched_samples):
+        for _i, sample in enumerate(batched_samples):
             if self.config.train_cfg:
                 # concat negative prompts to sample prompts to avoid two forward passes
                 embeds = torch.cat([sample["negative_prompt_embeds"], sample["prompt_embeds"]])
@@ -613,7 +613,7 @@ def create_model_card(self, path: str, model_name: Optional[str] = "TRL DDPO Mod
         try:
             user = whoami()["name"]
         # handle the offline case
-        except:  # noqa
+        except Exception:
             warnings.warn("Cannot retrieve user information assuming you are running in offline mode.")
             return
 
diff --git a/trl/trainer/ppo_trainer.py b/trl/trainer/ppo_trainer.py
index 537f77c8d9..df6c3090e9 100644
--- a/trl/trainer/ppo_trainer.py
+++ b/trl/trainer/ppo_trainer.py
@@ -1315,7 +1315,7 @@ def log_stats(
         stats: dict,
         batch: dict,
         rewards: List[torch.FloatTensor],
-        columns_to_log: List[str] = ["query", "response"],
+        columns_to_log: typing.Iterable[str] = ("query", "response"),
     ):
         """
         A function that logs all the training stats. Call it at the end of each epoch.
@@ -1337,7 +1337,7 @@ def log_stats(
         if self.config.log_with == "wandb":
             import wandb
 
-            if any([column_to_log not in batch.keys() for column_to_log in columns_to_log]):
+            if any(column_to_log not in batch.keys() for column_to_log in columns_to_log):
                 raise ValueError(f"Columns to log {columns_to_log} are not present in the batch {batch.keys()}.")
 
             batch_list = [batch[column_to_log] for column_to_log in columns_to_log]
@@ -1393,7 +1393,7 @@ def create_model_card(self, path: str, model_name: Optional[str] = "TRL Model")
         try:
             user = whoami()["name"]
         # handle the offline case
-        except:  # noqa
+        except Exception:
             warnings.warn("Cannot retrieve user information assuming you are running in offline mode.")
             return
 
@@ -1415,7 +1415,7 @@ def _show_tokens(self, tokens, masks):
 
         text = Text()
 
-        for i, (token, mask) in enumerate(zip(tokens, masks)):
+        for _i, (token, mask) in enumerate(zip(tokens, masks)):
             if mask == 1:
                 text.append(self.tokenizer.decode(token.item()), style="black on deep_sky_blue1")
                 text.append(" ")
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
index 9f103e31c0..46f2ad35ce 100644
--- a/trl/trainer/sft_trainer.py
+++ b/trl/trainer/sft_trainer.py
@@ -117,6 +117,7 @@ class SFTTrainer(Trainer):
         dataset_kwargs: (`Optional[Dict]`, *optional*):
             Dict of Optional kwargs to pass when creating packed or non-packed datasets
     """
+
     _tag_names = ["trl", "sft"]
 
     def __init__(
@@ -480,17 +481,17 @@ def _prepare_packed_dataloader(
             )
 
             def data_generator(constant_length_iterator):
-                for i in constant_length_iterator:
-                    yield i
+                yield from constant_length_iterator
 
             try:
                 packed_dataset = Dataset.from_generator(
                     data_generator, gen_kwargs={"constant_length_iterator": constant_length_iterator}
                 )
-            except (DatasetGenerationError, SchemaInferenceError):
+            except (DatasetGenerationError, SchemaInferenceError) as exc:
                 raise ValueError(
-                    "Error occurred while packing the dataset. Make sure that your dataset has enough samples to at least yield one packed sequence."
-                )
+                    "Error occurred while packing the dataset. "
+                    "Make sure that your dataset has enough samples to at least yield one packed sequence."
+                ) from exc
             return packed_dataset
         else:
             raise ValueError(