-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpretrain.sh
45 lines (41 loc) · 1.22 KB
/
pretrain.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env bash
source ./config.sh
DATA_BIN=out/data_bin_art/$PRETRAIN_CORPUS/data_bin_art
data_epoch=9
# CUDA_VISIBLE_DEVICES=0,1 python train.py ${DATA_BIN}_${data_epoch} \
CUDA_VISIBLE_DEVICES=$device python train.py ${DATA_BIN}_1 \
--save-dir $MODELS \
--max-epoch $data_epoch \
--batch-size 32 \
--max-tokens 300 \
--train-subset train \
--valid-subset valid \
--arch transformer \
--clip-norm 2 \
--lr 0.002 \
--min-lr 1e-4 \
--lr-shrink 0.999 \
--validate-interval 10 \
--dropout 0.2 \
--relu-dropout 0.2 \
--attention-dropout 0.2 \
--copy-attention-dropout 0.2 \
--encoder-embed-dim 512 \
--decoder-embed-dim 512 \
--max-target-positions 1024 \
--max-source-positions 1024 \
--encoder-ffn-embed-dim 4096 \
--decoder-ffn-embed-dim 4096 \
--encoder-attention-heads 8 \
--decoder-attention-heads 8 \
--copy-attention-heads 1 \
--share-all-embeddings \
--no-progress-bar \
--log-interval 1000 \
--no-ema \
--skip-invalid-size-inputs-valid-test \
--copy-attention \
--copy-attention-heads 1 \
--positive-label-weight 3.0 \
| tee $OUT/log/log$exp_${data_epoch}.out
python /lab/ogawa/scripts/slack/send_slack_message.py -m "Finish pretraining: [$data_epoch] $PRETRAIN_CORPUS"