quick fix on docs

allenai · Feb 14, 2025 · f877464 · f877464
1 parent d4b90a7
commit f877464
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 27 deletions.
diff --git a/docs/ai2_internal.md b/docs/ai2_internal.md
@@ -260,7 +260,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 200000 \
@@ -313,7 +313,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 5e-7 \
     --total_episodes 1000000 \

diff --git a/docs/archived_dev_scripts/olmo2_1124.sh b/docs/archived_dev_scripts/olmo2_1124.sh
@@ -256,7 +256,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 100000 \
     --penalty_reward_value -10.0 \
@@ -315,7 +315,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -379,7 +379,7 @@ python mason.py \
     --dataset_eval_splits test_prefs \
     --model_name_or_path allenai/open_instruct_dev \
     --model_revision 1206_finetune_epoch_2_lr_1e-5_loss_type_sum__4__1733525407 \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --learning_rate 3e-6 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
@@ -434,7 +434,7 @@ python mason.py \
     --dataset_eval_splits test_prefs \
     --model_name_or_path allenai/open_instruct_dev \
     --model_revision 1208_bsz64_13b_finetune_epoch_2_lr_5e-6_loss_type_sum__1__1733711678 \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --learning_rate 3e-6 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
@@ -489,7 +489,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -547,7 +547,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -605,7 +605,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -664,7 +664,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -722,7 +722,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -782,7 +782,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -840,7 +840,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \
@@ -898,7 +898,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --total_episodes 200000 \
     --penalty_reward_value -10.0 \

diff --git a/docs/archived_dev_scripts/olmoe_0125.sh b/docs/archived_dev_scripts/olmoe_0125.sh
@@ -411,7 +411,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 200000 \
@@ -464,7 +464,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 200000 \
@@ -517,7 +517,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 200000 \
@@ -571,7 +571,7 @@ python mason.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 200000 \

diff --git a/docs/tulu3.md b/docs/tulu3.md
@@ -275,7 +275,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 10000000 \
@@ -312,10 +312,10 @@ Couple of notes:
 
 ```bash
 source configs/beaker_configs/ray_node_setup.sh && python open_instruct/ppo_vllm_thread_ray_gtrl.py \
-    --dataset_mixer '{"allenai/RLVR-GSM-MATH-IF-Mixed-Constraints": 1.0}' \
-    --dataset_train_splits train \
-    --dataset_eval_mixer '{"allenai/RLVR-GSM-MATH-IF-Mixed-Constraints": 128}' \
-    --dataset_eval_splits train \
+    --dataset_mixer_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 1.0 \
+    --dataset_mixer_list_splits train \
+    --dataset_mixer_eval_list allenai/RLVR-GSM-MATH-IF-Mixed-Constraints 16 \
+    --dataset_mixer_eval_list_splits train \
     --max_token_length 2048 \
     --max_prompt_token_length 2048 \
     --response_length 2048 \
@@ -330,7 +330,7 @@ source configs/beaker_configs/ray_node_setup.sh && python open_instruct/ppo_vllm
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 1e-7 \
     --total_episodes 400000 \

diff --git a/scripts/train/rlvr/grpo_mini.sh b/scripts/train/rlvr/grpo_mini.sh
@@ -1,7 +1,7 @@
 python open_instruct/grpo_vllm_thread_ray_gtrl.py \
     --dataset_mixer_list allenai/RLVR-GSM 1.0 \
     --dataset_mixer_list_splits train \
-    --dataset_mixer_eval_list allenai/RLVR-GSM 1.0 \
+    --dataset_mixer_eval_list allenai/RLVR-GSM 16 \
     --dataset_mixer_eval_list_splits train \
     --max_token_length 1023 \
     --max_prompt_token_length 1024 \

diff --git a/scripts/train/rlvr/mini.sh b/scripts/train/rlvr/mini.sh
@@ -12,7 +12,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 10000 \

diff --git a/scripts/train/rlvr/tulu_rlvr.sh b/scripts/train/rlvr/tulu_rlvr.sh
@@ -12,7 +12,7 @@ python open_instruct/ppo_vllm_thread_ray_gtrl.py \
     --stop_token eos \
     --temperature 1.0 \
     --ground_truths_key ground_truth \
-    --chat_template tulu \
+    --chat_template_name tulu \
     --sft_messages_key messages \
     --learning_rate 3e-7 \
     --total_episodes 10000000 \