From 06b322736e7f83ec2a0841cb9b87cc490a78f0c9 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Tue, 15 Aug 2023 23:07:51 +0000 Subject: [PATCH 1/2] fix readme bug, add finetune_multigpu.sh --- README.md | 13 ++++----- train/sft/finetune_multigpu.sh | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 train/sft/finetune_multigpu.sh diff --git a/README.md b/README.md index 2088a36..48f010f 100644 --- a/README.md +++ b/README.md @@ -308,19 +308,20 @@ doker-compose up -d --build | Llama2-Chinese-13b-Chat | FlagAlpha/Llama2-Chinese-13b-Chat | meta-llama/Llama-2-13b-chat-hf | [模型下载](https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat) | 中文指令微调的LoRA参数与基础模型参数合并版本 | ### 加载微调模型 -通过[PEFT](https://github.com/huggingface/peft)加载预训练模型参数和微调模型参数,以下示例代码中,base_model_name_or_path为预训练模型参数保存路径,fintune_model_path为微调模型参数保存路径。 +通过[PEFT](https://github.com/huggingface/peft)加载预训练模型参数和微调模型参数,以下示例代码中,base_model_name_or_path为预训练模型参数保存路径,finetune_model_path为微调模型参数保存路径。 ```python +import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel,PeftConfig -# 例如: fintune_model_path='FlagAlpha/Llama2-Chinese-7b-Chat-LoRA' -fintune_model_path='' -config = PeftConfig.from_pretrained(fintune_model_path) +# 例如: finetune_model_path='FlagAlpha/Llama2-Chinese-7b-Chat-LoRA' +finetune_model_path='' +config = PeftConfig.from_pretrained(finetune_model_path) # 例如: base_model_name_or_path='meta-llama/Llama-2-7b-chat' tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,use_fast=False) tokenizer.pad_token = tokenizer.eos_token -model = LlamaForCausalLM.from_pretrained(config.base_model_name_or_path,device_map='auto',torch_dtype=torch.float16,load_in_8bit=True) -model = PeftModel.from_pretrained(model, fintune_model_path, device_map={"": 0}) +model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,device_map='auto',torch_dtype=torch.float16,load_in_8bit=True) +model = PeftModel.from_pretrained(model, finetune_model_path, device_map={"": 0}) model =model.eval() input_ids = tokenizer(['Human: 介绍一下北京\nAssistant: '], return_tensors="pt",add_special_tokens=False).input_ids.to('cuda') generate_input = { diff --git a/train/sft/finetune_multigpu.sh b/train/sft/finetune_multigpu.sh new file mode 100644 index 0000000..c5d22cd --- /dev/null +++ b/train/sft/finetune_multigpu.sh @@ -0,0 +1,48 @@ +output_model=/mnt/data/zhangzheng/data/llama2/sft_7b_chat +# 需要修改到自己的输入目录 +if [ ! -d ${output_model} ];then + mkdir ${output_model} +fi +cp ./finetune.sh ${output_model} +deepspeed --num_gpus --master_port 29510 finetune_clm_lora.py \ + --model_name_or_path meta-llama/Llama-2-7b-chat-hf \ + --train_files ../../data/train_sft.csv \ + --validation_files ../../data/dev_sft.csv \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 1 \ + --do_train \ + --do_eval \ + --use_fast_tokenizer false \ + --output_dir ${output_model} \ + --evaluation_strategy steps \ + --max_eval_samples 800 \ + --learning_rate 1e-4 \ + --gradient_accumulation_steps 16 \ + --num_train_epochs 10 \ + --warmup_steps 400 \ + --load_in_bits 4 \ + --lora_r 8 \ + --lora_alpha 32 \ + --target_modules q_proj,k_proj,v_proj,o_proj,down_proj,gate_proj,up_proj \ + --logging_dir ${output_model}/logs \ + --logging_strategy steps \ + --logging_steps 10 \ + --save_strategy steps \ + --preprocessing_num_workers 10 \ + --save_steps 200 \ + --eval_steps 200 \ + --save_total_limit 2000 \ + --seed 42 \ + --disable_tqdm false \ + --ddp_find_unused_parameters false \ + --block_size 4096 \ + --report_to tensorboard \ + --overwrite_output_dir \ + --deepspeed ds_config_zero2.json \ + --ignore_data_skip true \ + --bf16 \ + --gradient_checkpointing \ + --bf16_full_eval \ + --ddp_timeout 18000000 \ + | tee -a ${output_model}/train.log + \ No newline at end of file From 080fb50c017e8db516a51cfbd9df2d0866eb76c1 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Wed, 16 Aug 2023 05:39:55 +0000 Subject: [PATCH 2/2] remove multigpu script, add description to readme --- README.md | 2 +- train/sft/finetune_multigpu.sh | 48 ---------------------------------- 2 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 train/sft/finetune_multigpu.sh diff --git a/README.md b/README.md index 48f010f..0fb9121 100644 --- a/README.md +++ b/README.md @@ -295,7 +295,7 @@ doker-compose up -d --build #### Step3: 微调脚本 -我们提供了用于微调的脚本[train/sft/finetune.sh](https://github.com/FlagAlpha/Llama2-Chinese/blob/main/train/sft/finetune.sh),通过修改脚本的部分参数实现模型的微调,关于微调的具体代码见[train/sft/finetune_clm_lora.py](https://github.com/FlagAlpha/Llama2-Chinese/blob/main/train/sft/finetune_clm_lora.py)。 +我们提供了用于微调的脚本[train/sft/finetune.sh](https://github.com/FlagAlpha/Llama2-Chinese/blob/main/train/sft/finetune.sh),通过修改脚本的部分参数实现模型的微调,关于微调的具体代码见[train/sft/finetune_clm_lora.py](https://github.com/FlagAlpha/Llama2-Chinese/blob/main/train/sft/finetune_clm_lora.py),单机多卡的微调可以通过修改脚本中的`--include localhost:0`来实现。 ### 中文微调参数 我们基于中文指令数据集对Llama2-Chat模型进行了微调,使得Llama2模型有着更强的中文对话能力。LoRA参数以及与基础模型合并的参数均已上传至[Hugging Face](https://huggingface.co/FlagAlpha),目前包含7B和13B的模型。 diff --git a/train/sft/finetune_multigpu.sh b/train/sft/finetune_multigpu.sh deleted file mode 100644 index c5d22cd..0000000 --- a/train/sft/finetune_multigpu.sh +++ /dev/null @@ -1,48 +0,0 @@ -output_model=/mnt/data/zhangzheng/data/llama2/sft_7b_chat -# 需要修改到自己的输入目录 -if [ ! -d ${output_model} ];then - mkdir ${output_model} -fi -cp ./finetune.sh ${output_model} -deepspeed --num_gpus --master_port 29510 finetune_clm_lora.py \ - --model_name_or_path meta-llama/Llama-2-7b-chat-hf \ - --train_files ../../data/train_sft.csv \ - --validation_files ../../data/dev_sft.csv \ - --per_device_train_batch_size 1 \ - --per_device_eval_batch_size 1 \ - --do_train \ - --do_eval \ - --use_fast_tokenizer false \ - --output_dir ${output_model} \ - --evaluation_strategy steps \ - --max_eval_samples 800 \ - --learning_rate 1e-4 \ - --gradient_accumulation_steps 16 \ - --num_train_epochs 10 \ - --warmup_steps 400 \ - --load_in_bits 4 \ - --lora_r 8 \ - --lora_alpha 32 \ - --target_modules q_proj,k_proj,v_proj,o_proj,down_proj,gate_proj,up_proj \ - --logging_dir ${output_model}/logs \ - --logging_strategy steps \ - --logging_steps 10 \ - --save_strategy steps \ - --preprocessing_num_workers 10 \ - --save_steps 200 \ - --eval_steps 200 \ - --save_total_limit 2000 \ - --seed 42 \ - --disable_tqdm false \ - --ddp_find_unused_parameters false \ - --block_size 4096 \ - --report_to tensorboard \ - --overwrite_output_dir \ - --deepspeed ds_config_zero2.json \ - --ignore_data_skip true \ - --bf16 \ - --gradient_checkpointing \ - --bf16_full_eval \ - --ddp_timeout 18000000 \ - | tee -a ${output_model}/train.log - \ No newline at end of file