configs/ere.conf

base_config_conll04 {
  task = "ere"
  dataset = "conll04"

  data_dir = ${ASP}/data/conll04_ere/
  model_dir = ${ASP}/data/conll04_ere/
  log_root = ${ASP}/data/conll04_ere/
  
  max_segment_len = 256

  # Learning
  use_amp = true
  optimizer = "adamw"
  plm_learning_rate = 3e-5
  task_learning_rate = 3e-4
  plm_scheduler = "linear_with_warmup" # constant / constant_with_warmup / linear_with_warmup
  task_scheduler = "linear_with_warmup"

  warmup_ratio = 0.05
  
  adam_eps = 1e-8
  adam_weight_decay = 0.1

  init_std = 0.02  
  max_grad_norm = 1  # Set 0 to disable clipping

  batch_size = 8
  gradient_accumulation_steps = 1
  num_epochs = 200

  # Model hyperparameters.
  activation = "relu"
  dropout_rate = 0.3
  feature_emb_size = 20
  hidden_size = 1500

  # number of types
  num_typing_classes = 4
  num_linking_classes = 5

  # Other.
  beam_size = 1
  
  eval_frequency = 500
  report_frequency = 20

  plm_tokenizer_name = t5-small
}

t5_base_conll04 = ${base_config_conll04}{
  plm_learning_rate = 5e-5
  task_learning_rate = 1e-4

  plm_pretrained_name_or_path = t5-base
}
flant5_base_conll04 = ${t5_base_conll04}{
  plm_learning_rate = 5e-5
  task_learning_rate = 1e-4

  plm_pretrained_name_or_path = google/flan-t5-base
}

t5_large_conll04 = ${base_config_conll04}{
  plm_learning_rate = 5e-5
  task_learning_rate = 3e-4

  plm_pretrained_name_or_path = t5-large
}
flant5_large_conll04 = ${t5_large_conll04}{
  plm_pretrained_name_or_path = google/flan-t5-large
}

t5_3b_conll04 = ${t5_large_conll04}{
  plm_learning_rate = 3e-5
  task_learning_rate = 3e-4
  
  plm_pretrained_name_or_path = t5-3b
}
flant5_xl_conll04 = ${t5_3b_conll04}{
  plm_pretrained_name_or_path = google/flan-t5-xl
}
t0_3b_conll04 = ${t5_3b_conll04}{  
  plm_pretrained_name_or_path = bigscience/T0_3B
}


base_config_ace05 {
  task = "ere"
  dataset = "ace05"

  data_dir = ${ASP}/data/ace05_ere/
  model_dir = ${ASP}/data/ace05_ere/
  log_root = ${ASP}/data/ace05_ere/

  # Learning
  use_amp = true
  optimizer = "adamw"
  plm_scheduler = "linear_with_warmup" # constant / constant_with_warmup / linear_with_warmup
  task_scheduler = "linear_with_warmup"
  plm_learning_rate = 5e-5
  task_learning_rate = 1e-4
  
  adam_eps = 1e-8
  adam_weight_decay = 0.1
  
  init_std = 0.02

  feature_emb_size = 20
  hidden_size = 150
  max_segment_len = 256
  
  warmup_ratio = 0.1
  max_grad_norm = 1  # Set 0 to disable clipping
  gradient_accumulation_steps = 1
  batch_size = 8

  # Model hyperparameters.
  activation = "relu"
  dropout_rate = 0.3
  num_epochs = 100

  # number of types
  num_typing_classes = 7
  num_linking_classes = 6
  lr_weight = 1.0

  rb_action_logit = false
  
  # Other.
  beam_size = 1
  
  eval_frequency = 3000
  report_frequency = 100
  conll_eval_path = none
  
  plm_tokenizer_name = t5-small
}

t5_base_ace05 = ${base_config_ace05}{
  plm_learning_rate = 5e-5
  task_learning_rate = 3e-4
  warmup_ratio = 0.1

  hidden_size = 150
  eval_frequency = 1000

  plm_pretrained_name_or_path = t5-base
}

flant5_base_ace05 = ${t5_base_ace05}{
  plm_pretrained_name_or_path = google/flan-t5-base
}

t5_large_ace05 = ${base_config_ace05}{
  plm_learning_rate = 5e-5
  task_learning_rate = 3e-4
  warmup_ratio = 0.05

  hidden_size = 150
  eval_frequency = 1000

  plm_pretrained_name_or_path = t5-large
}
flant5_large_ace05 = ${t5_large_ace05}{
  plm_pretrained_name_or_path = google/flan-t5-large
}

t5_3b_ace05 = ${base_config_ace05}{
  plm_learning_rate = 5e-5
  task_learning_rate = 3e-4
  warmup_ratio = 0.05
  
  hidden_size = 150
  eval_frequency = 1000
  plm_pretrained_name_or_path = t5-3b
}


t0_3b_ace05 = ${t5_3b_ace05}{
  plm_pretrained_name_or_path = bigscience/T0_3B
}

flant5_xl_ace05 = ${t5_3b_ace05}{
  plm_pretrained_name_or_path = google/flan-t5-xl
}

t5_11b_ace05 = ${t5_3b_ace05}{
  plm_pretrained_name_or_path = t5-11b
}

flant5_xxl_ace05 = ${t5_3b_ace05}{
  plm_pretrained_name_or_path = bigscience/flan-t5-xxl
  eval_frequency = 2000
}