-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest.py
82 lines (70 loc) · 2.29 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import json
import spacy
import nltk
from nltk.tokenize import sent_tokenize
import random
from glob import glob
from pathlib import Path
from multiprocessing import Pool
import pandas as pd
from utils import *
from eval_metric import *
import transformers
from transformers import pipeline, set_seed
from transformers import (
CONFIG_MAPPING,
MODEL_FOR_CAUSAL_LM_MAPPING,
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
HfArgumentParser,
Trainer,
TrainingArguments,
default_data_collator,
set_seed,
BertTokenizer,
GPT2Tokenizer
)
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers import GPT2LMHeadModel, AutoTokenizer, AutoModelForMaskedLM
seed = 54
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)#as reproducibility docs
torch.manual_seed(seed)# as reproducibility docs
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False# as reproducibility docs
torch.backends.cudnn.deterministic = True# as reproducibility docs
set_seed(seed)
result = pd.read_csv("./eval/common_gen/generated_result_15_seed_40.csv")
gts = {}
for index, row in enumerate(result.itertuples()):
text = row[-1].strip()
text = text.split(".")[0]
gts[str(row[1])] = [text]
coverage = evaluator_coverage(gts)
self_bleu = evaluator_selfbleu(gts)
ppls = evaluator_ppl_all(gts, "/home2/zhanghanqing/pretrained_model/gpt2/large")
print(coverage, self_bleu, ppls)
result = pd.read_csv("./eval/common_gen/generated_result_18_seed_40.csv")
gts = {}
for index, row in enumerate(result.itertuples()):
text = row[-1].strip()
text = text.split(".")[0]
gts[str(row[1])] = [text]
coverage = evaluator_coverage(gts)
self_bleu = evaluator_selfbleu(gts)
ppls = evaluator_ppl_all(gts, "/home2/zhanghanqing/pretrained_model/gpt2/large")
print(coverage, self_bleu, ppls)
result = pd.read_csv("./eval/common_gen/generated_result_20_seed_40.csv")
gts = {}
for index, row in enumerate(result.itertuples()):
text = row[-1].strip()
text = text.split(".")[0]
gts[str(row[1])] = [text]
coverage = evaluator_coverage(gts)
self_bleu = evaluator_selfbleu(gts)
ppls = evaluator_ppl_all(gts, "/home2/zhanghanqing/pretrained_model/gpt2/large")
print(coverage, self_bleu, ppls)