-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcli.py
134 lines (119 loc) · 7.12 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script can be used to train and evaluate either a regular supervised model or a PET/iPET model on
one of the supported tasks and datasets.
"""
import argparse
import os
from typing import Tuple
import torch
import logging
from train import train_pet
from data_utils import PROCESSORS, load_metrics
logger = logging.getLogger('cli')
parser = argparse.ArgumentParser(
description="Command line interface for P-Tuning.")
# Required parameters
parser.add_argument("--data_dir", default=None, type=str, required=True,
help="The input data dir. Should contain the data files for the task.")
parser.add_argument("--model_type", default="albert", type=str, required=True,
help="The type of the pretrained language model to use")
parser.add_argument("--model_name_or_path", default="albert-xxlarge-v2", type=str, required=True,
help="Path to the pre-trained model or shortcut name")
parser.add_argument("--cache_dir", default="", type=str,
help="Where to store the pre-trained models downloaded from S3.")
parser.add_argument("--task_name", default=None, type=str, required=True, choices=PROCESSORS.keys(),
help="The name of the task to train/evaluate on")
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model predictions and checkpoints will be written")
# PET-specific optional parameters
parser.add_argument("--pattern_ids", default=[1], type=int, nargs='+',
help="The ids of the PVPs to be used (only for PET)")
parser.add_argument("--alpha", default=0.9999, type=float,
help="Weighting term for the auxiliary language modeling task (only for PET)")
parser.add_argument("--pet_repetitions", default=3, type=int,
help="The number of times to repeat PET training and testing with different seeds.")
parser.add_argument("--pet_max_seq_length", default=256, type=int,
help="The maximum total input sequence length after tokenization for PET. Sequences longer "
"than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--pet_per_gpu_train_batch_size", default=4, type=int,
help="Batch size per GPU/CPU for PET training.")
parser.add_argument("--pet_per_gpu_eval_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for PET evaluation.")
parser.add_argument('--pet_gradient_accumulation_steps', type=int, default=1,
help="Number of updates steps to accumulate before performing a backward/update pass in PET.")
parser.add_argument("--pet_num_train_epochs", default=3, type=float,
help="Total number of training epochs to perform in PET.")
parser.add_argument("--pet_max_steps", default=-1, type=int,
help="If > 0: set total number of training steps to perform in PET. Override num_train_epochs.")
# Other optional parameters
parser.add_argument("--train_examples", default=-1, type=int,
help="The total number of train examples to use, where -1 equals all examples.")
parser.add_argument("--eval_examples", default=-1, type=int,
help="The total number of test examples to use, where -1 equals all examples.")
parser.add_argument("--dev_examples", default=-1, type=int,
help="The total number of dev examples to use, where -1 equals all examples.")
parser.add_argument("--split_examples_evenly", action='store_true',
help="If true, train examples are not chosen randomly, but split evenly across all labels.")
parser.add_argument("--learning_rate", default=1e-5, type=float,
help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.1, type=float,
help="Weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float,
help="Max gradient norm.")
parser.add_argument("--warmup_steps", default=0, type=int,
help="Linear warmup over warmup_steps.")
parser.add_argument("--early_stop_epochs", default=10, type=int,
help="Threshold epochs for early stop.")
parser.add_argument("--no_cuda", action='store_true',
help="Avoid using CUDA when available")
parser.add_argument('--overwrite_output_dir', action='store_true',
help="Overwrite the content of the output directory")
parser.add_argument('--seed', type=int, default=42,
help="random seed for initialization")
parser.add_argument('--do_train', action='store_true',
help="Whether to perform training")
parser.add_argument('--do_eval', action='store_true',
help="Whether to perform evaluation")
parser.add_argument("--eval_set", choices=['dev', 'test'], default='dev',
help="Whether to perform evaluation on the dev set or the test set")
parser.add_argument("--embed_size", default=128, type=int, help="")
parser.add_argument('--prompt_encoder_type', type=str,
default="lstm", choices=['lstm', 'mlp', 'none', 'inner'])
parser.add_argument("--eval_every_step", default=20, type=int, help="")
# Enhanced training
parser.add_argument("--two_stage_train", action='store_true', default=False,
help="Whether do two stage training")
# parser.add_argument("--random_mask_rate", type=float, default=0.0,
# help="Whether do random additional masking.")
if __name__ == "__main__":
args = parser.parse_args()
logger.info("Parameters: {}".format(args))
if os.path.exists(args.output_dir) and os.listdir(args.output_dir) \
and args.do_train and not args.overwrite_output_dir:
raise ValueError(
"Output directory ({}) already exists and is not empty.".format(args.output_dir))
assert args.do_train or args.do_eval, "`do_train` and `do_eval` should be at least true for one"
# Setup CUDA, GPU & distributed training
args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
args.n_gpu = torch.cuda.device_count()
# Prepare task
args.task_name = args.task_name.lower()
if args.task_name not in PROCESSORS:
raise ValueError("Task '{}' not found".format(args.task_name))
args.label_list = PROCESSORS[args.task_name]().get_labels()
args.metrics = load_metrics(args.task_name)
# Start training
train_pet(args)