-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathcoref.conf
94 lines (71 loc) · 1.73 KB
/
coref.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
base_config {
task = "coref"
dataset = "ontonotes_coref"
data_dir = ${ASP}/data/ontonotes_coref/
model_dir = ${ASP}/data/ontonotes_coref/
log_root = /cluster/home/tialiu/tianyu/gen_coref_dump/coref/
# Computation limits.
max_segment_len = 2048
# Learning
use_amp = true
optimizer = "adamw"
plm_learning_rate = 3e-5
task_learning_rate = 3e-4
plm_scheduler = "linear_with_warmup"
task_scheduler = "linear_with_warmup"
warmup_ratio = 0.1
adam_eps = 1e-8
adam_weight_decay = 0.1
max_grad_norm = 1 # Set 0 to disable clipping
batch_size = 1
gradient_accumulation_steps = 1
num_epochs = 40
# Model hyperparameters.
activation = "relu"
init_std = 0.02
dropout_rate = 0.3
feature_emb_size = 20
hidden_size = 2048
# Other.
beam_size = 1
plm_pretrained_name_or_path = t5-base
plm_tokenizer_name = t5-small
eval_frequency = 2000
report_frequency = 100
}
## base ##
t5_base = ${base_config}{
plm_learning_rate = 5e-5
task_learning_rate = 3e-4
hidden_size = 2048
plm_pretrained_name_or_path = t5-base
eval_frequency = 3000
}
flant5_base = ${t5_base}{
plm_pretrained_name_or_path = google/flan-t5-base
}
## large ##
t5_large = ${t5_base}{
plm_pretrained_name_or_path = t5-large
}
flant5_large = ${t5_large}{
plm_pretrained_name_or_path = google/flan-t5-large
}
## 3b ##
t5_3b = ${t5_base}{
plm_learning_rate = 3e-5
task_learning_rate = 3e-4
hidden_size = 4096
plm_pretrained_name_or_path = t5-3b
eval_frequency = 3000
}
t0_3b = ${t5_3b}{
plm_pretrained_name_or_path = bigscience/T0_3B
}
flant5_xl = ${t5_3b}{
plm_pretrained_name_or_path = google/flan-t5-xl
}
## 11b ##
flant5_xxl = ${flant5_xl}{
plm_pretrained_name_or_path = google/flan-t5-xxl
}