From 7c6b2d31f8207f32ee9d9416e306fdaba6559a6e Mon Sep 17 00:00:00 2001 From: Oleg S <97077423+RobotSail@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:20:46 +0000 Subject: [PATCH] Update tests/smoketest.sh to support FSDP + LoRA as a testing path. Additionally introuce a max_seq_len parameter to support testing on lower-end hardware. Signed-off-by: Oleg S <97077423+RobotSail@users.noreply.github.com> --- tests/smoketest.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/smoketest.sh b/tests/smoketest.sh index a54c9764..6918fb03 100755 --- a/tests/smoketest.sh +++ b/tests/smoketest.sh @@ -19,6 +19,7 @@ NUM_GPUS="${2:-${DEFAULT_GPUS}}" # ############### User-modifiable parameters ############### # Change these as needed MAX_BATCH_LEN=60000 +MAX_SEQ_LEN=4096 NUM_SAMPLES_TRAINED_ON=5000 # upper-bound on training dataset size. # ############### Test Functions ############### @@ -203,17 +204,14 @@ function test_standard_loop_fsdp_lora() { --nproc_per_node="${NUM_GPUS}" \ main_ds.py \ --model_name_or_path="${MODEL_NAME}" \ - --is_granite \ --data_path="${COMPUTED_DATA_PATH}" \ --output_dir="${CHECKPOINTS_DIR}" \ --num_epochs=1 \ --effective_batch_size=128 \ --save_samples=0 \ --checkpoint_at_epoch \ - --accelerate_full_state_at_epoch \ --distributed_training_framework="${DISTRIB_FRAMEWORK}" \ --max_batch_len="${MAX_BATCH_LEN}" \ - --is_granite \ --lora_r=4 \ --lora_alpha=32 \ --lora_dropout=0.1 @@ -235,6 +233,7 @@ function main () { test_standard_loop_nongranite _cleanup_saved_checkpoints test_standard_loop + test_standard_loop_fsdp_lora } main