-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodule_MET_worker.py
239 lines (200 loc) · 10.1 KB
/
module_MET_worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
try:
from base import Base
from cursor_FSS import file_open, file_save
except ImportError as error:
print(error)
class MET_Worker(Base):
"""
Sub-class of Base for the MET base setup
Methods
-------
- build_MET() -> MET_Worker : constructs four variations to the MET that can be used to look up values
- start() -> dict, dict : the start method of the MET_Worker module
"""
def __init__(self, rows:list, cols:list, data:list=None):
super().__init__('met')
self.rows = rows
self.cols = cols
self.data = data
self.MET_Simple = {}
self.MET_SimpleReversed = {}
self.MET_Logical = {}
self.MET_Dictionary = {}
def build_MET(self):
"""
Constructs four variations to the MET that can be used to look up values
Parameters
----------
Returns
-------
- self (MET_Worker) : the instance of the class used by Module to call the worker method on this instance Base class
"""
def generate_met_simple():
"""
Generates and stores a simple MET dictionary for fast lookup
"""
if not file_open('compiled', 'met_simple', self.module_type, False):
self.MET_Simple = { "_".join(x['CN']) : x['Term'].lower() for x in met_compiled }
file_save('compiled', 'met_simple', self.MET_Simple, self.module_type)
else:
self.MET_Simple = file_open('compiled', 'met_simple', self.module_type)
def generate_met_reversed():
"""
Generates and stores a simple MET dictionary for fast reverse lookup with keys-values switched
"""
if not file_open('compiled', 'met_simple_reversed', self.module_type, False):
if not len(self.MET_Simple): generate_met_simple()
for k, v in self.MET_Simple.items():
if v not in self.MET_SimpleReversed:
self.MET_SimpleReversed[v] = k
else:
codes = [self.MET_SimpleReversed[v]] if type(self.MET_SimpleReversed[v]) is str else [x for x in self.MET_SimpleReversed[v]]
codes.append(k)
self.MET_SimpleReversed[v] = codes
self.MET_SimpleReversed = dict(sorted(self.MET_SimpleReversed.items(), key=lambda x: x[0].lower()))
file_save('compiled', 'met_simple_reversed', self.MET_SimpleReversed, self.module_type)
else:
self.MET_SimpleReversed = file_open('compiled', 'met_simple_reversed', self.module_type)
def generate_met_logical():
"""
Generates and stores a logical tree of the MET code path values
Methods
-------
- addLevel(path=list, MET_logical=dict, i=int) -> dict : recursive function to add new levels to the MET logical tree
"""
def addLevel(path:str, MET_logical:dict, i:int):
"""
A recusive private function that adds a new level to the MET_logical tree
Parameters
----------
- path (list) : list with logical path elements
- MET_logical (dict) : dictionary with logical values
- i (int) : counter in the recursive loop
Returns
-------
- MET_logical (dict) : dictionary with logical values
"""
if len(path) == 1:
MET_logical[path[0]] = {}
else:
i = i+1
keys = list(MET_logical.keys())
if keys[keys.index(path[0])] not in MET_logical:
MET_logical[keys[keys.index(path[0])]] = { path[1] : {} }
addLevel(path[1:], MET_logical[path[0]], i)
return MET_logical
if not file_open('compiled', 'met_logical', self.module_type, False):
for term in met_compiled:
self.MET_Logical = addLevel(term['CN'], self.MET_Logical, 1)
file_save('compiled', 'met_logical', self.MET_Logical, self.module_type)
else:
self.MET_Logical = file_open('compiled', 'met_logical', self.module_type)
def generate_met_dictionary():
"""
Generates and stores a tree of the MET term values
Methods
-------
- addTerm(path=list, term=str, MET_dictionary=dict, MET_Logical=dict) -> dict : recursive function to add new levels to the MET logical tree
"""
def addTerm(path:list, term:str, MET_dictionary:dict, MET_logical:dict):
"""
A recusive private function that adds a new level to the MET_dictionary tree
Parameters
----------
- path (list) : a list with logical path elements
- term (str) : term to add
- MET_dictionary (dict) : the MET_dictionary to add to
- MET_logical (dict) : the MET_logical with logical paths
Returns
-------
- MET_dictionary (dict) : the MET dictionary
"""
if len(path) == 1:
MET_dictionary[term] = {}
else:
keys = list(MET_logical.keys())
key = keys.index(path[0])
if len(list(MET_dictionary.keys())) < key:
cat = list(MET_dictionary.keys())[key]
else:
key = key - len(list(MET_dictionary.keys()))
cat = list(MET_dictionary.keys())[key]
if cat not in MET_dictionary:
MET_dictionary[cat] = { term : {} }
addTerm(path[1:], term, MET_dictionary[cat], MET_logical[path[0]])
return MET_dictionary
if not file_open('compiled', 'met_dictionary', self.module_type, False):
if not len(self.MET_Logical): generate_met_logical()
for term in met_compiled:
self.MET_Dictionary = addTerm(term['CN'], term['Term'], self.MET_Dictionary, self.MET_Logical)
file_save('compiled', 'met_dictionary', self.MET_Dictionary, self.module_type)
else:
self.MET_Dictionary = file_open('compiled', 'met_dictionary', self.module_type)
# THE FILE 'met' CONTAINS ALL REVELANT DATA TO GENERATE THE MET DATA REQUIRED TO UPDATE ALL RECORDS
# THIS FILE ORIGINATES FROM THE TERMS AND TERMMASTER TABLES IN THE GICARDTMSTHESTEST DATABASE
try:
met_compiled = file_open('compiled', 'met_compiled', self.module_type)
# COMBINE ROWS AND COLS TO SINGLE DICTIONARY
met_compiled = [{ y : row[self.cols.index(y)] for y in self.cols } for row in self.rows]
met_compiled = [{ k : int(v) if 'NodeDepth' in k else v for k, v in x.items() } for x in met_compiled]
met_compiled = [{ k : v.split('.') if 'CN' in k else v for k, v in x.items() } for x in met_compiled]
# THE ORDER OF CN ELEMENTS IS IMPORTANT TO PROPERLY GENERATE THE MET DICTIONARY FILES
met_compiled = sorted(met_compiled, key=lambda v: v['CN'])
# STORE THE FILE FOR LATER REUSE
file_save('compiled', 'met_compiled', met_compiled, self.module_type)
# AT THIS POINT WE CAN GENERATE FOUR DIFFERENT VERSIONS OF THE MET
generate_met_simple()
generate_met_reversed()
generate_met_logical()
generate_met_dictionary()
except:
raise
return self
def start(self):
"""
The start method of the MET_Worker module.
1) Iterates over all values from the ThesXRefs query
2) Updates all values with paths based on the MET files generated in build_MET()
Returns
-------
- self.relations (dict) : data relevant to manifest generation derived from the media records (these are media from the ThesXRefs table)
- dict : processing results
"""
rows = [{ y : row[self.data[0]['cols'].index(y)] for y in self.data[0]['cols'] } for row in self.data[0]['rows']]
def findPath(code:str):
"""
This method returns a path for a value-lookup in MET.
Parameters
----------
- code (str) : CN code to analyze
Returns
-------
- codes (list) : a list of codes
- list : a list of terms that belong with the paths
##### OR
- code (str) : the original single code
- list : the corresponding MET term
"""
if '.' in code:
code = code.split('.')
codes = ["_".join(code[:idx+1]) if len(code) > 1 else code[:idx+1] for idx, c in enumerate(code)]
return codes, [self.MET_Simple[code] for code in codes]
else:
return code, self.MET_Simple[code]
# RESOLVE ALL CODES TO COMPLETE PATHS
for row in rows:
try:
if row['MediaMasterID'] not in self.relations: self.relations[row['MediaMasterID']] = []
met_term = {
'CN' : row['CN'],
'Term' : row['Term'],
'NodeDepth' : row['NodeDepth'],
'Codes' : findPath(row['CN'])[0],
'Paths' : findPath(row['CN'])[1]
}
self.relations[row['MediaMasterID']].append(met_term)
except:
# NOTE: MediaMasterID 1521 has a problematic code (AAE.AAC.AAE.AAO), which apparently relates
# to another definition of MET in TMS. This will therefore throw an error, but this is a minor problem.
continue
return self.records, self.relations, self.thumbnail_urls, { 'MET_worker_res' : 0, 'MET_worker_err' : 0 }