Skip to content

Commit

Permalink
quick fix on docs
Browse files Browse the repository at this point in the history
  • Loading branch information
vwxyzjn committed Feb 14, 2025
1 parent d4b90a7 commit f877464
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 27 deletions.
4 changes: 2 additions & 2 deletions docs/ai2_internal.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 200000 \
Expand Down Expand Up @@ -313,7 +313,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 5e-7 \
--total_episodes 1000000 \
Expand Down
24 changes: 12 additions & 12 deletions docs/archived_dev_scripts/olmo2_1124.sh
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 100000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -315,7 +315,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -379,7 +379,7 @@ python mason.py \
--dataset_eval_splits test_prefs \
--model_name_or_path allenai/open_instruct_dev \
--model_revision 1206_finetune_epoch_2_lr_1e-5_loss_type_sum__4__1733525407 \
--chat_template tulu \
--chat_template_name tulu \
--learning_rate 3e-6 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
Expand Down Expand Up @@ -434,7 +434,7 @@ python mason.py \
--dataset_eval_splits test_prefs \
--model_name_or_path allenai/open_instruct_dev \
--model_revision 1208_bsz64_13b_finetune_epoch_2_lr_5e-6_loss_type_sum__1__1733711678 \
--chat_template tulu \
--chat_template_name tulu \
--learning_rate 3e-6 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
Expand Down Expand Up @@ -489,7 +489,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -547,7 +547,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -605,7 +605,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -664,7 +664,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -722,7 +722,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -782,7 +782,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -840,7 +840,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down Expand Up @@ -898,7 +898,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--total_episodes 200000 \
--penalty_reward_value -10.0 \
Expand Down
8 changes: 4 additions & 4 deletions docs/archived_dev_scripts/olmoe_0125.sh
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 200000 \
Expand Down Expand Up @@ -464,7 +464,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 200000 \
Expand Down Expand Up @@ -517,7 +517,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 200000 \
Expand Down Expand Up @@ -571,7 +571,7 @@ python mason.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 200000 \
Expand Down
12 changes: 6 additions & 6 deletions docs/tulu3.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 10000000 \
Expand Down Expand Up @@ -312,10 +312,10 @@ Couple of notes:

```bash
source configs/beaker_configs/ray_node_setup.sh && python open_instruct/ppo_vllm_thread_ray_gtrl.py \
--dataset_mixer '{"allenai/RLVR-GSM-MATH-IF-Mixed-Constraints": 1.0}' \
--dataset_train_splits train \
--dataset_eval_mixer '{"allenai/RLVR-GSM-MATH-IF-Mixed-Constraints": 128}' \
--dataset_eval_splits train \
--dataset_mixer_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 1.0 \
--dataset_mixer_list_splits train \
--dataset_mixer_eval_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 16 \
--dataset_mixer_eval_list_splits train \
--max_token_length 2048 \
--max_prompt_token_length 2048 \
--response_length 2048 \
Expand All @@ -330,7 +330,7 @@ source configs/beaker_configs/ray_node_setup.sh && python open_instruct/ppo_vllm
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 1e-7 \
--total_episodes 400000 \
Expand Down
2 changes: 1 addition & 1 deletion scripts/train/rlvr/grpo_mini.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python open_instruct/grpo_vllm_thread_ray_gtrl.py \
--dataset_mixer_list allenai/RLVR-GSM 1.0 \
--dataset_mixer_list_splits train \
--dataset_mixer_eval_list allenai/RLVR-GSM 1.0 \
--dataset_mixer_eval_list allenai/RLVR-GSM 16 \
--dataset_mixer_eval_list_splits train \
--max_token_length 1023 \
--max_prompt_token_length 1024 \
Expand Down
2 changes: 1 addition & 1 deletion scripts/train/rlvr/mini.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 10000 \
Expand Down
2 changes: 1 addition & 1 deletion scripts/train/rlvr/tulu_rlvr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
--stop_token eos \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template tulu \
--chat_template_name tulu \
--sft_messages_key messages \
--learning_rate 3e-7 \
--total_episodes 10000000 \
Expand Down

0 comments on commit f877464

Please sign in to comment.