diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 545815fe67..2bd74e5297 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,37 +1,10 @@ repos: - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.0 hooks: - - id: isort - args: - - --profile=black - - --skip-glob=wandb/**/* - - --thirdparty=wandb - - repo: https://github.com/myint/autoflake - rev: v1.4 - hooks: - - id: autoflake - args: - - -r - - --exclude=wandb,__init__.py - - --in-place - - --remove-unused-variables - - --remove-all-unused-imports - - repo: https://github.com/python/black - rev: 22.3.0 - hooks: - - id: black - args: - - --line-length=119 - - --target-version=py38 - - --exclude=wandb - - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - args: - - --ignore=E203,E501,W503,E128 - - --max-line-length=119 + - id: ruff + args: [ --fix ] + - id: ruff-format # - repo: https://github.com/codespell-project/codespell # rev: v2.1.0 diff --git a/examples/hello_world.py b/examples/hello_world.py index 4ba1c9b890..b2beb20293 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -29,7 +29,7 @@ "pad_token_id": tokenizer.eos_token_id, "max_new_tokens": 20, } -response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs) +response_tensor = ppo_trainer.generate(list(query_tensor), return_prompt=False, **generation_kwargs) response_txt = tokenizer.decode(response_tensor[0]) # 5. define a reward for response diff --git a/examples/research_projects/stack_llama/scripts/rl_training.py b/examples/research_projects/stack_llama/scripts/rl_training.py index 225d6b810a..c8502c5e89 100644 --- a/examples/research_projects/stack_llama/scripts/rl_training.py +++ b/examples/research_projects/stack_llama/scripts/rl_training.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -163,7 +162,7 @@ def preprocess_function(examples): def collator(data): - return dict((key, [d[key] for d in data]) for key in data[0]) + return {key: [d[key] for d in data] for key in data[0]} # set seed before initializing value head for deterministic eval diff --git a/examples/research_projects/tools/calculator.py b/examples/research_projects/tools/calculator.py index 76779695fe..122366ddaf 100644 --- a/examples/research_projects/tools/calculator.py +++ b/examples/research_projects/tools/calculator.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -107,7 +106,7 @@ def exact_match_reward(responses, answers=None): ) # main training loop -for step in range(100): +for _step in range(100): tasks, answers = generate_data(ppo_config.batch_size) queries, responses, masks, rewards, histories = text_env.run(tasks, answers=answers) train_stats = ppo_trainer.step(queries, responses, rewards, masks) diff --git a/examples/research_projects/tools/python_interpreter.py b/examples/research_projects/tools/python_interpreter.py index b7b69806ef..2e40f91ad1 100644 --- a/examples/research_projects/tools/python_interpreter.py +++ b/examples/research_projects/tools/python_interpreter.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -61,9 +60,9 @@ def exact_match_reward(responses, answers=None): if match_pattern: predicted_number = float(match_pattern[0]) if predicted_number is not None: - if np.abs((predicted_number - float(answer))) < 0.1: + if np.abs(predicted_number - float(answer)) < 0.1: reward += 1.0 - except: # noqa + except Exception: pass rewards.append(torch.tensor(reward)) return rewards diff --git a/examples/research_projects/tools/triviaqa.py b/examples/research_projects/tools/triviaqa.py index bd3bd90166..5eb5044c2b 100644 --- a/examples/research_projects/tools/triviaqa.py +++ b/examples/research_projects/tools/triviaqa.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -114,7 +113,7 @@ class ScriptArguments: def data_generator(): for i in range(len(dataset)): - yield dataset[i]["question"], [item for item in dataset[i]["answer"]["normalized_aliases"]] + yield dataset[i]["question"], list(dataset[i]["answer"]["normalized_aliases"]) gen = data_generator() @@ -123,7 +122,7 @@ def data_generator(): def generate_data(n): tasks, answers = [], [] - for i in range(n): + for _i in range(n): q, a = next(gen) tasks.append(q) answers.append(a) @@ -143,10 +142,14 @@ def exact_match_reward(responses, answers=None): return rewards +def tool_fn(x): + # limit the amount of tokens + return tool(x).split("\n")[1][:600] + + # text env tool = load_tool("vwxyzjn/pyserini-wikipedia-kilt-doc") -# limit the amount if tokens -tool_fn = lambda x: tool(x).split("\n")[1][:600] # noqa + text_env = TextEnvironment( model, tokenizer, @@ -184,8 +187,6 @@ def print_trainable_parameters(model): "answer": [", ".join(item) for item in answers], } all_rewards = ppo_trainer.accelerator.gather(torch.tensor(rewards, device=ppo_trainer.accelerator.device)) - ppo_trainer.log_stats( - train_stats, texts, [item for item in all_rewards], columns_to_log=["query", "response", "answer"] - ) + ppo_trainer.log_stats(train_stats, texts, list(all_rewards), columns_to_log=["query", "response", "answer"]) if i % 100 == 0: ppo_trainer.save_pretrained(f"models/{args.model_name}_{args.seed}_{i}_triviaqa") diff --git a/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py b/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py index 83e89b2caf..51f6d284c4 100644 --- a/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py +++ b/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -146,7 +145,7 @@ def tokenize(sample): def collator(data): - return dict((key, [d[key] for d in data]) for key in data[0]) + return {key: [d[key] for d in data] for key in data[0]} # set seed before initializing value head for deterministic eval @@ -218,7 +217,7 @@ def collator(data): response_tensors.append(response.squeeze()[-gen_len:]) batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors] - # Compute sentiment score # noqa + # Compute sentiment score texts = batch["response"] toxicity_inputs = toxicity_tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to( ppo_trainer.accelerator.device diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index 1a08f50818..587a5efdfb 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/scripts/ppo.py b/examples/scripts/ppo.py index b412e69db4..9282144e66 100644 --- a/examples/scripts/ppo.py +++ b/examples/scripts/ppo.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -95,7 +94,7 @@ def tokenize(sample): def collator(data): - return dict((key, [d[key] for d in data]) for key in data[0]) + return {key: [d[key] for d in data] for key in data[0]} # set seed before initializing value head for deterministic eval @@ -171,7 +170,7 @@ def collator(data): "max_new_tokens": 32, } -for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)): +for _epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)): query_tensors = batch["input_ids"] # Get response from gpt2 diff --git a/examples/scripts/ppo_multi_adapter.py b/examples/scripts/ppo_multi_adapter.py index 2bd489dfbd..782235781b 100644 --- a/examples/scripts/ppo_multi_adapter.py +++ b/examples/scripts/ppo_multi_adapter.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -97,7 +96,7 @@ def tokenize(example): def collator(data): - return dict((key, [d[key] for d in data]) for key in data[0]) + return {key: [d[key] for d in data] for key in data[0]} config = PPOConfig( @@ -131,7 +130,7 @@ def collator(data): "max_new_tokens": 32, } -for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)): +for _epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)): question_tensors = batch["input_ids"] response_tensors = ppo_trainer.generate( diff --git a/examples/scripts/reward_modeling.py b/examples/scripts/reward_modeling.py index 34af55987c..48a504cc35 100644 --- a/examples/scripts/reward_modeling.py +++ b/examples/scripts/reward_modeling.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/scripts/sft.py b/examples/scripts/sft.py index 5072920cb4..61f5eedb03 100644 --- a/examples/scripts/sft.py +++ b/examples/scripts/sft.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/pyproject.toml b/pyproject.toml index 7e6b3f84fa..301264c1d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,22 @@ -[tool.black] -line-length = 119 -target-version = ['py38'] - [tool.ruff] -ignore = ["E501", "E741", "W605"] -select = ["E", "F", "I", "W"] +target-version = "py37" line-length = 119 -# Ignore import violations in all `__init__.py` files. -[tool.ruff.per-file-ignores] -"__init__.py" = ["E402", "F401", "F403", "F811"] +[tool.ruff.lint] +ignore = [ + "B028", # warning without explicit stacklevel + "C408", # dict() calls (stylistic) + "C901", # function complexity + "E501", +] +extend-select = ["E", "F", "I", "W", "UP", "B", "T", "C"] + +[tool.ruff.lint.per-file-ignores] +# Allow prints in auxiliary scripts +"benchmark/**.py" = ["T201"] +"examples/**.py" = ["T201"] +"scripts/**.py" = ["T201"] -[tool.ruff.isort] +[tool.ruff.lint.isort] lines-after-imports = 2 known-first-party = ["trl"] diff --git a/scripts/log_example_reports.py b/scripts/log_example_reports.py index a7918925b9..b49a608826 100644 --- a/scripts/log_example_reports.py +++ b/scripts/log_example_reports.py @@ -31,7 +31,7 @@ def main(text_file_name, slack_channel_name=None): if os.path.isfile(text_file_name): final_results = {} - file = open(text_file_name, "r") + file = open(text_file_name) lines = file.readlines() for line in lines: result, config_name = line.split(",") diff --git a/scripts/log_reports.py b/scripts/log_reports.py index de4b27c1e5..5fd38c44cb 100644 --- a/scripts/log_reports.py +++ b/scripts/log_reports.py @@ -40,7 +40,7 @@ def main(slack_channel_name=None): for log in Path().glob("*.log"): section_num_failed = 0 i = 0 - with open(log, "r") as f: + with open(log) as f: for line in f: line = json.loads(line) i += 1 diff --git a/scripts/stale.py b/scripts/stale.py index de7b869c13..0713f7f419 100644 --- a/scripts/stale.py +++ b/scripts/stale.py @@ -35,7 +35,7 @@ def main(): open_issues = repo.get_issues(state="open") for issue in open_issues: - comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True) + comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True) last_comment = comments[0] if len(comments) > 0 else None if ( last_comment is not None diff --git a/setup.cfg b/setup.cfg index cb69438f56..0c9e0fc144 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,11 +1,2 @@ [metadata] license_file = LICENSE - -[isort] -ensure_newline_before_comments = True -force_grid_wrap = 0 -include_trailing_comma = True -line_length = 119 -lines_after_imports = 2 -multi_line_output = 3 -use_parentheses = True diff --git a/tests/test_no_peft.py b/tests/test_no_peft.py index d16b8eca8f..1c9dd9b02c 100644 --- a/tests/test_no_peft.py +++ b/tests/test_no_peft.py @@ -95,14 +95,14 @@ def test_no_peft(self): # Check that loading a model with `peft` will raise an error with pytest.raises(ModuleNotFoundError): - import peft # noqa + import peft # noqa: F401 - trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id) # noqa - trl_seq2seq_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(self.seq_to_seq_model_id) # noqa + _trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id) + _trl_seq2seq_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(self.seq_to_seq_model_id) def test_imports_no_peft(self): with patch.dict(sys.modules, {"peft": None}): - from trl import ( # noqa + from trl import ( # noqa: F401 AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, PPOConfig, @@ -141,7 +141,7 @@ def test_ppo_trainer_no_peft(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break # check gradients are not None diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py index e2a9d2a138..a5f096feff 100644 --- a/tests/test_ppo_trainer.py +++ b/tests/test_ppo_trainer.py @@ -200,7 +200,7 @@ def test_ppo_step(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break for param in ppo_trainer.model.parameters(): @@ -230,9 +230,7 @@ def test_ppo_step_with_masks(self): response_mask = [torch.ones_like(r) for r in response_tensor] # train model - train_stats = ppo_trainer.step( - [q for q in query_tensor], [r for r in response_tensor], reward, response_mask - ) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward, response_mask) break for param in ppo_trainer.model.parameters(): @@ -264,7 +262,7 @@ def test_ppo_step_with_no_ref_sgd(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break for name, param in ppo_trainer.model.named_parameters(): @@ -304,8 +302,8 @@ def test_ppo_step_with_no_ref_sgd_lr_scheduler(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break for name, param in ppo_trainer.model.named_parameters(): @@ -341,7 +339,7 @@ def test_ppo_step_with_no_ref(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break for name, param in ppo_trainer.model.named_parameters(): @@ -391,7 +389,7 @@ def test_ppo_step_with_no_ref_custom_layers(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break pattern = r".*transformer\.h\.(\d+)\..*" @@ -404,7 +402,7 @@ def test_ppo_step_with_no_ref_custom_layers(self): assert param.grad is None, f"Parameter {name} has a gradient" else: assert param.grad is not None, f"Parameter {name} has no gradient" - elif any([layer in name for layer in final_layers]): + elif any(layer in name for layer in final_layers): assert param.grad is not None, f"Parameter {name} has no gradient" # ref model should not be trained @@ -459,11 +457,11 @@ def test_ppo_step_rewards_shape(self): reward = [torch.tensor([[1.0]]), torch.tensor([[0.0]])] # train model - this should raise an error with pytest.raises(ValueError): - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) reward = [torch.tensor([1.0]), torch.tensor([0.0])] # train model - this should work - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break # check if the gradients are computed for the model @@ -498,7 +496,7 @@ def test_ppo_step_input_shape(self): bs = ppo_trainer.config.batch_size queries, responses, _, _ = ppo_trainer._step_safety_checker( - bs, [q for q in query_tensor], [r for r in response_tensor], reward + bs, list(query_tensor), list(response_tensor), reward ) assert isinstance(queries, list), f"queries should be a list, got {type(queries)}" @@ -703,7 +701,7 @@ def test_ppo_trainer_max_grad_norm(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break # check gradients @@ -892,11 +890,11 @@ def make_inputs_require_grad(module, input, output): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model by running a step twice - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) ppo_trainer.model.train() ppo_trainer.model.gradient_checkpointing_enable() - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break # check gradients @@ -980,11 +978,11 @@ def make_inputs_require_grad(module, input, output): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model by running a step twice - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) ppo_trainer.model.train() ppo_trainer.model.gradient_checkpointing_enable() - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break new_logits = ppo_trainer.model.compute_reward_score(dummy_inputs) @@ -1090,11 +1088,11 @@ def make_inputs_require_grad(module, input, output): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model by running a step twice - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) ppo_trainer.model.train() ppo_trainer.model.gradient_checkpointing_enable() - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break # check gradients @@ -1160,7 +1158,7 @@ def test_grad_accumulation(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(1.0)] # train model by running a step twice - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break model_grad = gpt2_model.v_head.summary.weight @@ -1184,7 +1182,7 @@ def test_grad_accumulation(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(1.0)] # train model by running a step twice - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break model_grad_acc = gpt2_model_clone.v_head.summary.weight @@ -1222,7 +1220,7 @@ def test_push_to_hub_if_best_reward(self): # (this could be any reward such as human feedback or output from another model) reward = [torch.tensor(1.0), torch.tensor(0.0)] # train model - _ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward) + _ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward) break def test_batch_size_check(self): diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index adf98ededd..5e1ea9dfd2 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -176,7 +176,7 @@ def test_reward_trainer_peft(self): # check gradients are not None for n, param in trainer.model.named_parameters(): - if any([t in n for t in trainable_params_name]): + if any(t in n for t in trainable_params_name): previous_trainable_params[n] = param.clone() else: previous_non_trainable_params[n] = param.clone() diff --git a/trl/core.py b/trl/core.py index 1a0e8761a6..9d92ee18f0 100644 --- a/trl/core.py +++ b/trl/core.py @@ -30,7 +30,7 @@ try: from collections.abc import Mapping except ImportError: - from collections import Mapping + from collections.abc import Mapping WANDB_PADDING = -1 @@ -80,7 +80,7 @@ def stack_dicts(stats_dicts: List[Dict]) -> Dict: def add_suffix(input_dict: Dict, suffix: str) -> Dict: """Add suffix to dict keys.""" - return dict((k + suffix, v) for k, v in input_dict.items()) + return {k + suffix: v for k, v in input_dict.items()} def pad_to_size(tensor: torch.Tensor, size: int, dim: int = 1, padding: int = 50256) -> torch.Tensor: @@ -194,7 +194,7 @@ def respond_to_batch( ) -> torch.LongTensor: """Sample text from language model.""" input_ids = queries - for i in range(txt_len): + for _i in range(txt_len): # Get Logits outputs = model(input_ids) next_token_logits = outputs[0][:, -1, :] @@ -236,7 +236,7 @@ def __call__(self) -> int: return np.random.choice(self.values) -class PPODecorators(object): +class PPODecorators: optimize_device_cache = False @classmethod diff --git a/trl/environment/base_environment.py b/trl/environment/base_environment.py index 58b61fd17f..7037166d76 100644 --- a/trl/environment/base_environment.py +++ b/trl/environment/base_environment.py @@ -46,7 +46,7 @@ def __call__(self, input_ids, scores, **kwargs): done = [] for i, decoded_generation in enumerate(decoded_generations): - sequence_complete = any([stop_string in decoded_generation for stop_string in self.stop_strings]) + sequence_complete = any(stop_string in decoded_generation for stop_string in self.stop_strings) done.append(sequence_complete) if not sequence_complete: self.generated_tokens[i] += 1 @@ -243,7 +243,7 @@ def __init__( if isinstance(tools, dict): self.tools = tools else: - self.tools = dict([(tool.__class__.__name__, tool) for tool in tools]) + self.tools = {tool.__class__.__name__: tool for tool in tools} self.reward_fn = reward_fn self.max_length = max_length self.request_token = "" @@ -278,7 +278,7 @@ def run(self, queries, **rewards_kwargs): histories = [TextHistory(q, qt, system=True) for q, qt in zip(queries, queries_tokens)] - while any([not history.completed for history in histories]) and turns < self.max_turns: + while any(not history.completed for history in histories) and turns < self.max_turns: histories = self.generate(histories) histories = self.tasks_end_check(histories) # TODO: make this parallel rather than for-loop diff --git a/trl/extras/best_of_n_sampler.py b/trl/extras/best_of_n_sampler.py index 1441eecd41..b400b14b18 100644 --- a/trl/extras/best_of_n_sampler.py +++ b/trl/extras/best_of_n_sampler.py @@ -7,7 +7,7 @@ from ..models import SUPPORTED_ARCHITECTURES, PreTrainedModelWrapper -class BestOfNSampler(object): +class BestOfNSampler: def __init__( self, model: PreTrainedModelWrapper, diff --git a/trl/models/modeling_base.py b/trl/models/modeling_base.py index f6d4e86bba..f7894ddedb 100644 --- a/trl/models/modeling_base.py +++ b/trl/models/modeling_base.py @@ -71,6 +71,7 @@ class PreTrainedModelWrapper(nn.Module): supported_args: (`list`) The list of arguments that are supported by the wrapper class. """ + transformers_parent_class = None supported_args = None supported_modules = ("v_head",) @@ -378,12 +379,12 @@ def _get_checkpoint_from_hub( ) # load json if is_resuming_training: - with open(index_file_name, "r") as f: + with open(index_file_name) as f: index = json.load(f) # check filename with `v_head` or any known extra module: files_to_download = set() for k, v in index["weight_map"].items(): - if any([module in k for module in cls.supported_modules]): + if any(module in k for module in cls.supported_modules): files_to_download.add(v) is_sharded = True @@ -460,7 +461,7 @@ def add_and_load_reward_modeling_adapter( "adapter_model.bin", token=token, ) - except: # noqa + except Exception: filename = os.path.join(adapter_model_id, "adapter_model.safetensors") safe_loading = True if not os.path.exists(filename): @@ -470,10 +471,11 @@ def add_and_load_reward_modeling_adapter( "adapter_model.safetensors", token=token, ) - except: # noqa + except Exception as exc: raise ValueError( - "Could not find adapter model in the Hub, make sure you have the correct adapter model id." - ) + "Could not find adapter model in the Hub, " + "make sure you have the correct adapter model id." + ) from exc else: local_filename = filename else: @@ -485,7 +487,7 @@ def add_and_load_reward_modeling_adapter( adapter_state_dict = loading_func(local_filename, **load_kwargs) for score_name_candidate in cls.supported_rm_modules: - if any([score_name_candidate in name for name in adapter_state_dict.keys()]): + if any(score_name_candidate in name for name in adapter_state_dict.keys()): score_name = score_name_candidate # we have found the correct head name and can break break @@ -498,7 +500,7 @@ def add_and_load_reward_modeling_adapter( score_dict[key_name] = param.to(cls._get_current_device()) num_labels, hidden_dim = score_dict["weight"].shape - has_bias = any(["bias" in name for name in adapter_state_dict.keys()]) + has_bias = any("bias" in name for name in adapter_state_dict.keys()) score = nn.Linear(hidden_dim, num_labels, bias=has_bias).to( device=cls._get_current_device(), @@ -636,7 +638,7 @@ def create_reference_model( else: for pattern_candidate in LAYER_PATTERNS: pattern_candidate = pattern_candidate.format(layer=num_shared_layers) - if any([pattern_candidate in name for name in parameter_names]): + if any(pattern_candidate in name for name in parameter_names): pattern = pattern_candidate break @@ -648,7 +650,7 @@ def create_reference_model( unshared_param_list = [] shared_parameter = True - for name, param in model.named_parameters(): + for name, _param in model.named_parameters(): if pattern in name: shared_parameter = False if shared_parameter: @@ -661,8 +663,7 @@ def create_reference_model( param = model.get_parameter(param_name) param.requires_grad = False - ref_param = ref_model.get_parameter(param_name) # noqa - ref_param = param # noqa + _ref_param = ref_model.get_parameter(param_name) # for all other parameters just make sure they don't use gradients for param_name in unshared_param_list: diff --git a/trl/models/modeling_sd_base.py b/trl/models/modeling_sd_base.py index 954e71fff1..2cfb842408 100644 --- a/trl/models/modeling_sd_base.py +++ b/trl/models/modeling_sd_base.py @@ -34,7 +34,7 @@ @dataclass -class DDPOPipelineOutput(object): +class DDPOPipelineOutput: """ Output class for the diffusers pipeline to be finetuned with the DDPO trainer @@ -54,7 +54,7 @@ class DDPOPipelineOutput(object): @dataclass -class DDPOSchedulerOutput(object): +class DDPOSchedulerOutput: """ Output class for the diffusers scheduler to be finetuned with the DDPO trainer @@ -69,7 +69,7 @@ class DDPOSchedulerOutput(object): log_probs: torch.Tensor -class DDPOStableDiffusionPipeline(object): +class DDPOStableDiffusionPipeline: """ Main class for the diffusers pipeline to be finetuned with the DDPO trainer """ diff --git a/trl/models/modeling_value_head.py b/trl/models/modeling_value_head.py index 2771cc6ce2..457546ab51 100644 --- a/trl/models/modeling_value_head.py +++ b/trl/models/modeling_value_head.py @@ -85,6 +85,7 @@ class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper): - **"normal"** -- Initializes the weights of the `ValueHead` with a normal distribution. """ + transformers_parent_class = AutoModelForCausalLM lm_head_namings = ["lm_head", "embed_out"] supported_args = ( @@ -218,7 +219,7 @@ def state_dict(self, *args, **kwargs): return pretrained_model_state_dict def push_to_hub(self, *args, **kwargs): - setattr(self.pretrained_model, "v_head", self.v_head) + self.pretrained_model.v_head = self.v_head return self.pretrained_model.push_to_hub(*args, **kwargs) @@ -276,6 +277,7 @@ class AutoModelForSeq2SeqLMWithValueHead(PreTrainedModelWrapper): kwargs: Additional keyword arguments passed along to the `ValueHead` class. """ + transformers_parent_class = AutoModelForSeq2SeqLM lm_head_namings = ["lm_head", "embed_out", "output_projection"] supported_args = ( @@ -298,7 +300,7 @@ def __init__(self, pretrained_model, **kwargs): def _has_lm_head(self): # check module names of all modules inside `pretrained_model` to find the language model head - for name, module in self.pretrained_model.named_modules(): + for name, _module in self.pretrained_model.named_modules(): if any(attribute in name for attribute in self.lm_head_namings): return True return False @@ -374,7 +376,7 @@ def state_dict(self, *args, **kwargs): return pretrained_model_state_dict def push_to_hub(self, *args, **kwargs): - setattr(self.pretrained_model, "v_head", self.v_head) + self.pretrained_model.v_head = self.v_head return self.pretrained_model.push_to_hub(*args, **kwargs) diff --git a/trl/trainer/ddpo_config.py b/trl/trainer/ddpo_config.py index 3108613814..b73bd58d05 100644 --- a/trl/trainer/ddpo_config.py +++ b/trl/trainer/ddpo_config.py @@ -107,7 +107,7 @@ def to_dict(self): def __post_init__(self): if self.log_with not in ["wandb", "tensorboard"]: warnings.warn( - ("Accelerator tracking only supports image logging if `log_with` is set to 'wandb' or 'tensorboard'.") + "Accelerator tracking only supports image logging if `log_with` is set to 'wandb' or 'tensorboard'." ) if self.log_with == "wandb" and not is_torchvision_available(): diff --git a/trl/trainer/ddpo_trainer.py b/trl/trainer/ddpo_trainer.py index 9fab11120f..df219da707 100644 --- a/trl/trainer/ddpo_trainer.py +++ b/trl/trainer/ddpo_trainer.py @@ -523,7 +523,7 @@ def _train_batched_samples(self, inner_epoch, epoch, global_step, batched_sample global_step (int): The updated global step """ info = defaultdict(list) - for i, sample in enumerate(batched_samples): + for _i, sample in enumerate(batched_samples): if self.config.train_cfg: # concat negative prompts to sample prompts to avoid two forward passes embeds = torch.cat([sample["negative_prompt_embeds"], sample["prompt_embeds"]]) @@ -613,7 +613,7 @@ def create_model_card(self, path: str, model_name: Optional[str] = "TRL DDPO Mod try: user = whoami()["name"] # handle the offline case - except: # noqa + except Exception: warnings.warn("Cannot retrieve user information assuming you are running in offline mode.") return diff --git a/trl/trainer/ppo_trainer.py b/trl/trainer/ppo_trainer.py index 537f77c8d9..df6c3090e9 100644 --- a/trl/trainer/ppo_trainer.py +++ b/trl/trainer/ppo_trainer.py @@ -1315,7 +1315,7 @@ def log_stats( stats: dict, batch: dict, rewards: List[torch.FloatTensor], - columns_to_log: List[str] = ["query", "response"], + columns_to_log: typing.Iterable[str] = ("query", "response"), ): """ A function that logs all the training stats. Call it at the end of each epoch. @@ -1337,7 +1337,7 @@ def log_stats( if self.config.log_with == "wandb": import wandb - if any([column_to_log not in batch.keys() for column_to_log in columns_to_log]): + if any(column_to_log not in batch.keys() for column_to_log in columns_to_log): raise ValueError(f"Columns to log {columns_to_log} are not present in the batch {batch.keys()}.") batch_list = [batch[column_to_log] for column_to_log in columns_to_log] @@ -1393,7 +1393,7 @@ def create_model_card(self, path: str, model_name: Optional[str] = "TRL Model") try: user = whoami()["name"] # handle the offline case - except: # noqa + except Exception: warnings.warn("Cannot retrieve user information assuming you are running in offline mode.") return @@ -1415,7 +1415,7 @@ def _show_tokens(self, tokens, masks): text = Text() - for i, (token, mask) in enumerate(zip(tokens, masks)): + for _i, (token, mask) in enumerate(zip(tokens, masks)): if mask == 1: text.append(self.tokenizer.decode(token.item()), style="black on deep_sky_blue1") text.append(" ") diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py index 9f103e31c0..46f2ad35ce 100644 --- a/trl/trainer/sft_trainer.py +++ b/trl/trainer/sft_trainer.py @@ -117,6 +117,7 @@ class SFTTrainer(Trainer): dataset_kwargs: (`Optional[Dict]`, *optional*): Dict of Optional kwargs to pass when creating packed or non-packed datasets """ + _tag_names = ["trl", "sft"] def __init__( @@ -480,17 +481,17 @@ def _prepare_packed_dataloader( ) def data_generator(constant_length_iterator): - for i in constant_length_iterator: - yield i + yield from constant_length_iterator try: packed_dataset = Dataset.from_generator( data_generator, gen_kwargs={"constant_length_iterator": constant_length_iterator} ) - except (DatasetGenerationError, SchemaInferenceError): + except (DatasetGenerationError, SchemaInferenceError) as exc: raise ValueError( - "Error occurred while packing the dataset. Make sure that your dataset has enough samples to at least yield one packed sequence." - ) + "Error occurred while packing the dataset. " + "Make sure that your dataset has enough samples to at least yield one packed sequence." + ) from exc return packed_dataset else: raise ValueError(