-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.txt
61 lines (61 loc) · 2.77 KB
/
model.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
BLCUParser(
(bert): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(21128, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): BertLayerNorm()
(dropout): Dropout(p=0.1, inplace=False))
(encoder): BertEncoder(
(layer): ModuleList(
(0~11共12层): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False))
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): BertLayerNorm()
(dropout): Dropout(p=0.1, inplace=False)))
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True))
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): BertLayerNorm()
(dropout): Dropout(p=0.1, inplace=False))))
(pooler): BertPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()))
(project_bert): Linear(in_features=768, out_features=512, bias=False)
(embedding): MultiLevelEmbedding(
(embs): ModuleList()
(emb_dropouts): ModuleList()
(extra_content_dropout): FeatureDropout()
(layer_norm): LayerNormalization()
(dropout): FeatureDropout()
(timing_dropout): FeatureDropout())
(encoder): Encoder(
(attn_0 * attention层数): MultiHeadAttention(
(attention): ScaledDotProductAttention(
(dropout): Dropout(p=0.2, inplace=False)
(softmax): Softmax(dim=-1))
(layer_norm): LayerNormalization()
(proj1): Linear(in_features=256, out_features=512, bias=False)
(proj2): Linear(in_features=256, out_features=512, bias=False)
(residual_dropout): FeatureDropout())
(ff_0): PartitionedPositionwiseFeedForward(
(w_1c): Linear(in_features=512, out_features=1024, bias=True)
(w_1p): Linear(in_features=512, out_features=1024, bias=True)
(w_2c): Linear(in_features=1024, out_features=512, bias=True)
(w_2p): Linear(in_features=1024, out_features=512, bias=True)
(layer_norm): LayerNormalization()
(relu_dropout): FeatureDropout()
(residual_dropout): FeatureDropout()
(relu): ReLU())
(f_label): Sequential(
(0): Linear(in_features=1024, out_features=250, bias=True)
(1): LayerNormalization()
(2): ReLU()
(3): Linear(in_features=250, out_features=277, bias=True)))