-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathid_files.py
331 lines (294 loc) · 13.5 KB
/
id_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
"""Takes list of scenes and sample of dialogue, uses it to find
all instances of each dialogue entered, writes filename and
of file dialogue is found in, plus scene number. If dialogue
found in multiple files, replaces scene number with ???? for
subsequent instances. If dialogue sample not found, writes
'File not found' in place of filename.
Copyright (C) 2019 theflyingzamboni
"""
from collections import OrderedDict
from copy import deepcopy
import fnmatch
import os
from pathlib import PurePath
from config_handler import numerical_sort, write_file_list
import re
END_DLG_FLAG = re.compile(b'[\x00-\xff][\x00-\x05]\xff\xa0[\x00-\x26]\x00')
# BPE and MRG searching won't work on an unpacked folder as these are never
# base files, and get deleted in the unpacking process.
FILETYPE_DICT = {'BPE': re.compile(b'^[\x00-\xff]{4}BPE\x1a'),
'CLUT': re.compile(b'\x12\x00{3}'),
'DEFF': re.compile(b'^DEFF'),
'MCQ': re.compile(b'^MCQ'),
'MRG': re.compile(b'^MRG\x1a'),
'PXL': re.compile(b'^\x11\x00{3}'),
'TIM': re.compile(b'^\x10\x00{3}'),
'TMD': re.compile(b'\x41\x00{3}'),
'LMB': re.compile(b'\x43\x4d\x42'),
'TEXT': END_DLG_FLAG}
def build_index(dir_to_index, output_file=None):
if not os.path.isdir(dir_to_index):
print('%s not found' % dir_to_index)
return
# List out all files in directory being searched and loop through them.
file_list = []
for r, dn, fn in os.walk(dir_to_index):
for f in fnmatch.filter(fn, '*.bin'):
file_list.append(os.path.join(r, f))
file_list.sort(key=numerical_sort)
output_dict = OrderedDict()
disc = None
files_searched = 0
total_files = len(file_list)
update_percent = max(1, total_files // 250)
print('Searching: 0%', end='\r')
for index, file in enumerate(file_list):
sect_index = None
file_path = PurePath(file)
file_parts = file_path.parts
for i, x in enumerate(file_parts):
if re.search('Disc [1234]|All Discs', x, re.IGNORECASE) \
and x not in output_dict:
disc = x
output_dict[disc] = OrderedDict()
# Since the MRG files in the SECT folders are the only ones that
# need to be searched to ID file types, ignore anything that
# isn't in SECT.
if re.match('sect$', x, re.IGNORECASE):
sect_index = i
break
if sect_index is None:
continue
# Since the number of folders nested within SECT varies, work
# backwards through folder levels until SECT is reached.
for i, x in reversed(list(enumerate(file_parts))):
if i == len(file_parts) - 1:
patch_target = '1'
elif i == sect_index + 1:
break
else:
patch_target = '0'
parent_file = file_parts[i - 1].upper().replace('_DIR', '.BIN')
curr_file = x.upper().replace('_DIR', '.BIN')
parent_file = os.path.join(*file_parts[sect_index:i - 1], parent_file)
parent_file = parent_file.replace('\\', '/')
file_num = str(os.path.splitext(curr_file)[0].split('_')[-1])
file_num = file_num.replace('{', '').replace('}', '')
# Set flag for whether file is a main file (DRGNx.BIN), and
# whether file should be flagged for patching. Only bottom-
# level files should be flagged. If a parent file is not in
# the output_dict, add it with flags and set of file numbers,
# otherwise add file number of child file to set.
if parent_file not in output_dict[disc]:
if ('drgn0.bin' in parent_file.lower()
or 'drgn1.bin' in parent_file.lower()
or 'drgn21.bin' in parent_file.lower()
or 'drgn22.bin' in parent_file.lower()
or 'drgn23.bin' in parent_file.lower()
or 'drgn24.bin' in parent_file.lower()):
is_main = '1'
else:
is_main = '0'
output_dict[disc][parent_file] = {is_main, (file_num, patch_target)}
else:
output_dict[disc][parent_file].add((file_num, patch_target))
files_searched += 1
if files_searched % update_percent == 0:
print('Searching: %.1f%%' % round(
files_searched / total_files * 100, 1), end='\r')
print('Searching: 100% \n')
# Sort output_dict in numerical sorting order.
for disc, disc_val in deepcopy(output_dict).items():
for key in sorted(list(disc_val.keys()), key=numerical_sort):
output_dict[disc].move_to_end(key)
# Sort file numbers within each parent file.
for disc, disc_val in output_dict.items():
for key in disc_val.keys():
try:
output_dict[disc][key].remove('0')
is_main = '0'
except KeyError:
output_dict[disc][key].remove('1')
is_main = '1'
output_dict[disc][key] = sorted(
list(disc_val[key]),
key=lambda tup: numerical_sort(tup[0]))
output_dict[disc][key].insert(0, is_main)
# Generate output using output_dict.
if output_file is not None:
output_dict = {'[PATCH]': output_dict, '[SWAP]': {}}
write_file_list(output_file, output_dict)
else:
for disc, disc_val in output_dict.items():
print(''.join(('#', disc.title())))
for entry, entry_val in disc_val.items():
print(''.join((entry, '\t', str(entry_val[0]))))
for item in entry_val[1:]:
print(item[0] + '\t' + item[1])
else:
print()
def id_file_type(dir_to_search, file_type=None, header_pattern=None,
output_file=None):
"""
Identifies files of given type by header or byte pattern.
File types can be defined by either file type or byte pattern. Only
one is required, and file type is given higher priority.
If the desired file type is not already predefined, a raw byte pattern
can be used instead. Please consider contacting dev to add file type
if information is known. This function should mostly be used on files
unpacked using the unpack_all() function in game_file_handler only, due
to the removal of non-bottom-level files. Not doing so could result in
inappropriately set is_patch_target flags on some files.
dir_to_search : str
Name of root directory to search
file_type : str
Type of file to search for; should be one of the listed known types
(default: None)
header_pattern : str
Hex sequence to search for in header; known sequences should be added
to type dict (default: None)
output : str
Name of file list text file to output results to
(default: None [will print to console])
"""
if not os.path.isdir(dir_to_search):
print('%s not found' % dir_to_search)
return
try:
file_type = file_type.upper() # Raises AttributeError if None
if header_pattern is not None:
print('Use type or pattern, not both. Defaulting to type.\n')
header_pattern = None
if file_type not in FILETYPE_DICT:
raise KeyError
except AttributeError:
if header_pattern is None:
print('Please provide either a file_type or header_pattern')
return
else:
header_pattern = re.compile(b''.join(
(b'^', bytes.fromhex(header_pattern))))
except KeyError:
print('File type not recognized. Currently recognized '
'file types are %s' % list(FILETYPE_DICT.keys()))
return
# List out all files in directory being searched and loop through them.
file_list = []
for r, dn, fn in os.walk(dir_to_search):
for f in fnmatch.filter(fn, '*.bin'):
file_list.append(os.path.join(r, f))
file_list.sort(key=numerical_sort)
output_dict = OrderedDict()
disc = None
files_searched = 0
total_files = len(file_list)
update_percent = max(1, total_files // 250)
print('Searching: 0%', end='\r')
for index, file in enumerate(file_list):
found_match = False
sect_index = None
# Read necessary data
with open(file, 'rb') as f:
# If file_type is TEXT, search for the end token pattern through
# the full file. Otherwise, just read the first 16 bytes to match
# the header.
if file_type == 'TEXT' or file_type == 'LMB':
data = f.read()
else:
data = f.read(16)
# Search for pattern matches
if file_type == 'TEXT':
match_list = END_DLG_FLAG.finditer(data)
for m in match_list:
if not m.start(0) % 2:
found_match = True
break
elif (file_type is not None and FILETYPE_DICT[file_type].search(data)) \
or (header_pattern is not None and header_pattern.search(data)):
found_match = True
# Add files with matches to output dictionary. If a match is found,
# the file and all its parent files are added to the dict.
if found_match:
file_path = PurePath(file)
file_parts = file_path.parts
for i, x in enumerate(file_parts):
if re.search('Disc [1234]|All Discs', x, re.IGNORECASE) \
and x not in output_dict:
disc = x
output_dict[disc] = OrderedDict()
# Since the MRG files in the SECT folders are the only ones that
# need to be searched to ID file types, ignore anything that
# isn't in SECT.
if re.match('sect$', x, re.IGNORECASE):
sect_index = i
break
if sect_index is None:
continue
# Since the number of folders nested within SECT varies, work
# backwards through folder levels until SECT is reached.
for i, x in reversed(list(enumerate(file_parts))):
if i == len(file_parts) - 1:
patch_target = '1'
elif i == sect_index + 1:
break
else:
patch_target = '0'
parent_file = file_parts[i - 1].upper().replace('_DIR', '.BIN')
curr_file = x.upper().replace('_DIR', '.BIN')
parent_file = os.path.join(*file_parts[sect_index:i - 1], parent_file)
parent_file = parent_file.replace('\\', '/')
file_num = str(os.path.splitext(curr_file)[0].split('_')[-1])
file_num = file_num.replace('{', '').replace('}', '')
# Set flag for whether file is a main file (DRGNx.BIN), and
# whether file should be flagged for patching. Only bottom-
# level files should be flagged. If a parent file is not in
# the output_dict, add it with flags and set of file numbers,
# otherwise add file number of child file to set.
if parent_file not in output_dict[disc]:
if ('drgn0.bin' in parent_file.lower()
or 'drgn1.bin' in parent_file.lower()
or 'drgn21.bin' in parent_file.lower()
or 'drgn22.bin' in parent_file.lower()
or 'drgn23.bin' in parent_file.lower()
or 'drgn24.bin' in parent_file.lower()):
is_main = '1'
else:
is_main = '0'
output_dict[disc][parent_file] = {is_main, (file_num, patch_target)}
else:
output_dict[disc][parent_file].add((file_num, patch_target))
files_searched += 1
if files_searched % update_percent == 0:
print('Searching: %.1f%%' % round(
files_searched / total_files * 100, 1), end='\r')
print('Searching: 100% \n')
# Sort output_dict in numerical sorting order.
for disc, disc_val in deepcopy(output_dict).items():
for key in sorted(list(disc_val.keys()), key=numerical_sort):
output_dict[disc].move_to_end(key)
# Sort file numbers within each parent file.
for disc, disc_val in output_dict.items():
for key in disc_val.keys():
try:
output_dict[disc][key].remove('0')
is_main = '0'
except KeyError:
output_dict[disc][key].remove('1')
is_main = '1'
output_dict[disc][key] = sorted(
list(disc_val[key]),
key=lambda tup: numerical_sort(tup[0]))
output_dict[disc][key].insert(0, is_main)
# Generate output using output_dict.
if output_file:
output_dict = {'[PATCH]': output_dict, '[SWAP]': {}}
write_file_list(output_file, output_dict)
else:
for disc, disc_val in output_dict.items():
print(''.join(('#', disc.title())))
for entry, entry_val in disc_val.items():
print(''.join((entry, '\t', str(entry_val[0]))))
for item in entry_val[1:]:
print(item[0] + '\t' + item[1])
else:
print()