Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX / PPO: Fix enable_input_require_grads issues with PPO models #1664

Merged
merged 4 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions trl/models/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ def __init__(
if hasattr(pretrained_model, "gradient_checkpointing_enable"):
self.gradient_checkpointing_enable = pretrained_model.gradient_checkpointing_enable

if hasattr(pretrained_model, "enable_input_require_grads"):
self.enable_input_require_grads = pretrained_model.enable_input_require_grads

self.supports_rm_adapter = supports_rm_adapter
self.rm_adapter_name = rm_adapter_name
self.policy_adapter_name = "default"
Expand Down
2 changes: 2 additions & 0 deletions trl/trainer/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ class PPOConfig:
"""Score clipping"""
whiten_rewards: bool = False
"""Whiten the rewards before compute advantages"""
gradient_checkpointing: bool = False
"""Enable gradient checkpointing"""

# computed hyperparameters at runtime; we use `tyro.conf.Suppress` to hide them from the help text
is_encoder_decoder: Optional[tyro.conf.Suppress[bool]] = None
Expand Down
12 changes: 12 additions & 0 deletions trl/trainer/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,18 @@ def __init__(
self.accelerator.state, "deepspeed_plugin"
)

if config.gradient_checkpointing:
self.model.gradient_checkpointing_enable()

if hasattr(self.model, "enable_input_require_grads"):
self.model.enable_input_require_grads()
else:
# For backward compatibility with older versions of transformers
def make_inputs_require_grad(module, input, output):
output.requires_grad_(True)

self.model.pretrained_model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)

(
self.model,
self.optimizer,
Expand Down
Loading