-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerate_distill_data.sh
72 lines (65 loc) · 2.34 KB
/
generate_distill_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
#SBATCH --job-name=generate_distill_data
#SBATCH -c 2
#SBATCH --time=24:00:00
#SBATCH --mem=10G
#SBATCH --output=../../jobs/%x/%j.out
version_key="distill"
overwrite=True
model_name="ChatGPT"
if [ ${model_name} == "gpt-4" ]; then
export OPENAI_API_KEY=
export OPENAI_API_BASE=""
export OPENAI_API_TYPE="azure"
export OPENAI_API_VERSION="2023-07-01-preview"
fi
# task='translation'
# xgptscore_mode="wmt_mqm"
# input_file="../../data/synthesis_min/translation/train_data.kb_txt.distill.syn_cand.json"
# python generate_distill_data.py \
# --task ${task} \
# --input_file ${input_file} \
# --xgptscore_mode ${xgptscore_mode} \
# --version_key ${version_key} \
# --model_name ${model_name} \
# --overwrite ${overwrite} \
# task='summarization'
# xgptscore_mode="align_score"
# input_file="../../data/synthesis_min/summarization/train_data.kb_txt.distill.syn_cand.json"
# python generate_distill_data.py \
# --task ${task} \
# --input_file ${input_file} \
# --xgptscore_mode ${xgptscore_mode} \
# --version_key ${version_key} \
# --model_name ${model_name} \
# --overwrite ${overwrite} \
# task='data2text'
# xgptscore_mode="d2t"
# input_file="../../data/synthesis_min/data2text/train_data.kb_txt.distill.syn_cand.json"
# python generate_distill_data.py \
# --task ${task} \
# --input_file ${input_file} \
# --xgptscore_mode ${xgptscore_mode} \
# --version_key ${version_key} \
# --model_name ${model_name} \
# --overwrite ${overwrite} \
# task='instruction-following'
# xgptscore_mode="instruction_following"
# input_file="../../data/synthesis_min/instruction-following/train_data.kb_txt.distill.syn_cand.json"
# python generate_distill_data.py \
# --task ${task} \
# --input_file ${input_file} \
# --xgptscore_mode ${xgptscore_mode} \
# --version_key ${version_key} \
# --model_name ${model_name} \
# --overwrite ${overwrite} \
task='long-form QA'
xgptscore_mode="longform_qa"
input_file="../../data/synthesis_min/long-form QA/train_data.kb_txt.distill.syn_cand.json"
python generate_distill_data.py \
--task "${task}" \
--input_file "${input_file}" \
--xgptscore_mode ${xgptscore_mode} \
--version_key ${version_key} \
--model_name ${model_name} \
--overwrite ${overwrite} \