-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparsing_VOI.py
141 lines (109 loc) · 4.56 KB
/
parsing_VOI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#author @t_sanf
import os
import pandas as pd
from collections import Counter
from functools import reduce
from xml.etree.ElementTree import ElementTree
from xml.etree.ElementTree import Element
import xml.etree.ElementTree as etree
class ParseVOI(object):
'''
Convert VOI files to bounding boxes
'''
def __init__(self):
'''
:param filePATH (str) --> the path to the directory containing VOI files of interest
'''
self.PATH=''
def list_of_dicts_all_images(self,filepaths):
'''takes a
:param filepaths --> list of filepaths
:output
'''
for file in os.listdir(filepaths):
one_dict=self.BBox_from_position(path=file)
list_segment.append(one_dict)
return(list_segment)
def list_of_dicts(self,filepath):
'''generates a list of dicts after iterating over stuff
note - this function calls BBox_from_position function
:return: list of dictionaries, one for each
'''
self.PATH=filepath
file_path=filepath
list_segment=[]
for file in os.listdir(file_path):
if file.endswith('.voi'):
path_file=os.path.join(file_path, str(file))
one_dict=self.BBox_from_position(path=path_file)
list_segment.append(one_dict)
#common = list(reduce(lambda x, y: x & y.keys(), list_segment))
return(list_segment)
def BBox_from_position(self,path):
'''
take output from get ROI_slice_loc and return category and bbox for each slice
:return: dict {slice:(category,xmin,ymin,xmax,ymax)}
'''
pd_df=pd.read_fwf(path)
# use get_ROI_slice_loc to find location of each segment
dict=self.get_ROI_slice_loc(path=path)
for slice in dict.keys():
values=dict[slice]
category=values[0]
select_val=list(range(values[1],values[2]))
specific_part=pd_df.iloc[select_val,:]
split_df = specific_part.join(specific_part['MIPAV VOI FILE'].str.split(' ', 1, expand=True).rename(columns={0: "X", 1: "Y"})).drop(['MIPAV VOI FILE'], axis=1)
# parse to find max/min X and Y values, save into dictionary
xmin = int(float(split_df["X"].min()))
ymin = int(float(split_df["Y"].min()))
xmax = int(float(split_df["X"].max()))
ymax = int(float(split_df["Y"].max()))
tuple=(category,xmin,ymin,xmax,ymax)
dict.update({slice:tuple})
return(dict)
def get_ROI_slice_loc(self,path=None):
'''
selects each slice number and the location of starting coord and end coord
:return: dict of {slice number:(tuple of start location, end location)}
'''
pd_df=pd.read_fwf(path)
#get the name of the file
filename=path.split(os.sep)[-1].split('.')[0]
#initialize empty list and empty dictionary
slice_num_list=[]
last_line=[]
loc_dict={}
#find the location of the last line -->
for line in range(len(pd_df)):
line_specific=pd_df.iloc[line,:]
as_list=line_specific.str.split(r"\t")[0]
if "# slice number" in as_list: #find location of all #slice numbers
slice_num_list.append(line)
if '# unique ID of the VOI' in as_list:
last_line.append(line)
for i in range(len(slice_num_list)):
#for all values except the last value
if i<(len(slice_num_list)-1):
loc=slice_num_list[i]
line_specific=pd_df.iloc[loc,:]
slice_num=line_specific.str.split(r"\t")[0][0]
start=slice_num_list[i]+3
end=slice_num_list[i+1]-1
loc_dict.update({slice_num:(filename,start,end)})
#for the last value
if i == (len(slice_num_list) - 1):
loc = slice_num_list[i]
line_specific=pd_df.iloc[loc,:]
slice_num=line_specific.str.split(r"\t")[0][0]
start=slice_num_list[i]+3
end=(last_line[0]-1)
loc_dict.update({slice_num: (filename, start, end)})
return(loc_dict)
def intersection(list1, list2):
# Use of hybrid method
temp = set(list2)
lst3 = [value for value in list1 if value in temp]
return lst3
if __name__=='__main__':
c=ParseVOI()
c.list_of_dicts(r'')