forked from j-luo93/ASLI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_tsv_from_tfevent.py
68 lines (54 loc) · 2.56 KB
/
read_tsv_from_tfevent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import argparse
import csv
import glob
import os
from pathlib import Path
from tensorflow.python.summary.summary_iterator import summary_iterator
# the summary_iterator returns objects with the following form:
# wall_time: float
# step: int
# summary {
# value {
# tag: str
# simple_value: float
# }
# }
def get_event_file_paths(directory):
'''returns filepaths to all tfevents files in a certain directory'''
return map(str, Path(directory).glob('**/event*'))
# return glob.glob(os.path.join(directory, '*', '*', 'event*'))
def read_event_file(out_folder, path):
header = ['step', 'tag', 'value']
# what is represented by 'value' depends on the tag — it could be loss, or accuracy, or the gradient norm
out_folder = Path(out_folder)
out_folder.mkdir(parents=True, exist_ok=True)
# this loses info about the full path; alternative below
# file_name = path.replace('/', '_') + '.tsv' # preserves all info about the full path but creates very long filenames
# maybe a smarter alternative would be just to have a two-line header, with the first line being the full filepath. This is slightly unconventional but it's smart here.
# NOTE(j_luo) I'm going with the long file name to perserve more information.
name = path.replace("/", '__')
file_name = (out_folder / name).with_suffix('.tsv')
with open(file_name, 'w') as f:
writer = csv.writer(f, delimiter='\t')
writer.writerow(header)
for e in summary_iterator(path):
# there's only one entry in each Event, but this is the only way I have found to extract the Value
for v in e.summary.value:
line = [e.step, v.tag, v.simple_value]
writer.writerow(line)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="export a tfevents file to a tsv")
parser.add_argument('--data_path', required=True,
help='date-level directory to export tfevents files from (eg log/2020-08-18)')
parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity')
parser.add_argument('--out_folder', type=str, default='processed_log', help='Output folder for the processed files.')
args = parser.parse_args()
paths = get_event_file_paths(args.data_path)
if args.verbose:
print("Reading the following tfevent files:")
for path in paths:
if args.verbose:
print('\t' + path)
read_event_file(args.out_folder, path)
if args.verbose:
print(f"Saved .tsv files under {args.out_folder}.")