-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_eval_chatbot_ablation.sh
executable file
·45 lines (40 loc) · 1.63 KB
/
train_eval_chatbot_ablation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
CUDA_VISIBLE_DEVICES=0,1,2,3
ABLATION=$1
if [ ! "$ABLATION" ]; then
echo "Input an ablation experiment name in ['qa', 'qa_rephrase', 'qar_correct_expl', 'qar_correct_wrong1_expl', 'qar_correct_wrong2_expl']!"
exit 1
fi
DIR=your_output_dir_$ABLATION
MODEL_SIZE=7
# DATASET=augmented_ablation
DATASET=ablation
LOG_PATH=logs/${MODEL_SIZE}b/$DIR
cd llama-recipes
[ -d $LOG_PATH ] || mkdir -p $LOG_PATH
torchrun > ${LOG_PATH}/epoch1.txt \
--nnodes 1 --nproc_per_node 4 recipes/finetuning/finetuning.py \
--enable_fsdp --use_peft --peft_method lora \
--model_name ../llama-2-${MODEL_SIZE}b/${MODEL_SIZE}B \
--fsdp_config.pure_bf16 \
--batch_size_training 12 \
--dataset custom_dataset \
--custom_dataset.file "preprocess_data/$DATASET/${ABLATION}.py:get_preprocessed_custom" \
--output_dir ../llama-2-${MODEL_SIZE}b/chatbot\(finetuned_15k_$DIR\)/epoch1 \
--num_epochs 1 \
--save_model
# continue training from last epoch
for i in `seq 1 19`
do
torchrun > ${LOG_PATH}/epoch$(($i+1)).txt \
--nnodes 1 --nproc_per_node 4 recipes/finetuning/finetuning.py \
--enable_fsdp --use_peft --peft_method lora \
--model_name ../llama-2-${MODEL_SIZE}b/${MODEL_SIZE}B \
--fsdp_config.pure_bf16 \
--batch_size_training 12 \
--dataset custom_dataset \
--custom_dataset.file "preprocess_data/$DATASET/${ABLATION}.py:get_preprocessed_custom" \
--output_dir ../llama-2-${MODEL_SIZE}b/chatbot\(finetuned_15k_$DIR\)/epoch$(($i+1)) \
--from_peft_checkpoint ../llama-2-${MODEL_SIZE}b/chatbot\(finetuned_15k_$DIR\)/epoch$i \
--num_epochs 1 \
--save_model
done