Add config file for TopWK+Posterior

Alibaba-NLP · Feb 18, 2021 · aeb059e · aeb059e
1 parent 7b65a46
commit aeb059e
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -66,6 +66,13 @@ python train_with_teacher.py --config config/multi_bert_300epoch_0.5anneal_2000b
 
 ---
 
+**Posterior+Top-WK distillation**
+```
+python train_with_teacher.py --config config/multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_posterior_4temperature_both_old_relearn_nodev_fast_new_ner1.yaml
+```
+
+---
+
 ### Training the Multilingual Model with M-BERT finetuning
 
 #### Finetuning M-BERT **without** the CRF layer

diff --git a/...ence_distill_fast_crfatt_posterior_4temperature_both_old_relearn_nodev_fast_new_ner1.yaml b/...ence_distill_fast_crfatt_posterior_4temperature_both_old_relearn_nodev_fast_new_ner1.yaml
@@ -0,0 +1,63 @@
+MFVI:
+  hexa_rank: 150
+  hexa_std: 1
+  iterations: 3
+  normalize_weight: true
+  quad_rank: 150
+  quad_std: 1
+  tag_dim: 150
+  use_hexalinear: false
+  use_quadrilinear: false
+  use_second_order: false
+  use_third_order: false
+  window_size: 1
+ModelDistiller:
+  distill_mode: true
+  optimizer: SGD
+  train_with_professor: false
+anneal_factor: 0.5
+embeddings:
+  BertEmbeddings:
+    bert_model_or_path: bert-base-multilingual-cased
+    layers: '-1'
+    pooling_operation: mean
+interpolation: 0.5
+is_teacher_list: true
+model:
+  FastSequenceTagger:
+    crf_attention: true
+    distill_crf: true
+    distill_posterior: true
+    dropout: 0.0
+    hidden_size: 600
+    relearn_embeddings: true
+    sentence_loss: true
+    temperature: 4.0
+    use_crf: true
+model_name: multi_bert_300epoch_0.5anneal_2000batch_0.1lr_600hidden_multilingual_crf_sentloss_10patience_distill_fast_crfatt_posterior_4temperature_both_old_relearn_nodev_fast_new_ner1
+ner:
+  Corpus: CONLL_03_DUTCH:CONLL_03_SPANISH:CONLL_03:CONLL_03_GERMAN
+  tag_dictionary: resources/taggers/ner_tags.pkl
+  teachers:
+    ? config_gen/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml
+    : CONLL_03_GERMAN
+    ? config_gen/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_en_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml
+    : CONLL_03
+    ? config_gen/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_es_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner12.yaml
+    : CONLL_03_SPANISH
+    ? config_gen/multi_bert_origflair_300epoch_2000batch_1lr_256hidden_nl_monolingual_crf_sentloss_10patience_baseline_fast_nodev_ner11.yaml
+    : CONLL_03_DUTCH
+target_dir: resources/taggers/
+targets: ner
+teacher_annealing: true
+train:
+  learning_rate: 0.1
+  max_epochs: 300
+  mini_batch_size: 2000
+  monitor_test: false
+  patience: 10
+  professor_interpolation: 0.5
+  save_final_model: false
+  train_with_dev: false
+  true_reshuffle: false
+trainer: ModelDistiller
diff --git a/config/test_de.yaml b/config/test_de.yaml
@@ -0,0 +1,52 @@
+ModelDistiller:
+  distill_mode: false
+  train_with_professor: false
+anneal_factor: 2
+embeddings:
+  TransformerWordEmbeddings:
+    model: bert-base-multilingual-cased
+    layers: '-1'
+  FlairEmbeddings-1:
+    model: de-forward
+  FlairEmbeddings-2:
+    model: de-backward
+  WordEmbeddings:
+    embeddings: de
+interpolation: 0.5
+is_teacher_list: true
+model:
+  SequenceTagger:
+    hidden_size: 256
+    sentence_loss: true
+    use_crf: true
+model_name: multi_bert_origflair_300epoch_2000batch_1lr_256hidden_de_monolingual_crf_sentloss_10patience_baseline_nodev_ner0
+ner:
+  Corpus: CONLL_03_GERMAN
+  professors:
+    config/single-de-ner.yaml: CONLL_03_GERMAN
+    config/single-en-ner.yaml: CONLL_03
+    config/single-es-ner.yaml: CONLL_03_SPANISH
+    config/single-nl-ner.yaml: CONLL_03_DUTCH
+  tag_dictionary: resources/taggers/ner_tags.pkl
+  teachers:
+    config/multi_bert_flair_2000batch_1lr_de_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_GERMAN
+    config/multi_bert_flair_2000batch_1lr_en_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03
+    config/multi_bert_flair_2000batch_1lr_es_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner0.yaml: CONLL_03_SPANISH
+    config/multi_bert_flair_2000batch_1lr_nl_monolingual_nocrf_sentloss_10patience_baseline_nodev_ner1.yaml: CONLL_03_DUTCH
+target_dir: resources/taggers/
+targets: ner
+teacher_annealing: false
+train:
+  learning_rate: 0.1
+  max_epochs: 300
+  mini_batch_size: 2000
+  monitor_test: false
+  patience: 10
+  professor_interpolation: 0.5
+  save_final_model: false
+  train_with_dev: false
+upos:
+  Corpus: UD_GERMAN:UD_ENGLISH:UD_FRENCH:UD_ITALIAN:UD_DUTCH:UD_SPANISH:UD_PORTUGUESE:UD_CHINESE
+  UD_GERMAN:
+    train_config: config/
+  tag_dictionary: resources/taggers/pos_tags.pkl