-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathififuncs.py
2018 lines (1868 loc) · 75 KB
/
ififuncs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
'''
A collection of functions that other scripts can use.
'''
from __future__ import print_function
import subprocess
import sys
import time
import smtplib
import mimetypes
import getpass
import os
import filecmp
import hashlib
import datetime
import uuid
import tempfile
import csv
import operator
import json
import ctypes
import platform
import itertools
import unicodedata
import shutil
from builtins import input
import makedfxml
from glob import glob
from email.mime.multipart import MIMEMultipart
from email.mime.audio import MIMEAudio
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
from email.mime.text import MIMEText
try:
from lxml import etree
except ImportError:
print('ERROR - lxml is not installed - try pip install lxml')
sys.exit()
def diff_textfiles(source_textfile, other_textfile):
'''
Compares two textfiles. Returns strings that indicate losslessness.
'''
if filecmp.cmp(source_textfile, other_textfile, shallow=False):
print(" - YOUR FILES ARE LOSSLESS YOU SHOULD BE SO HAPPY!!!")
return 'lossless'
else:
print(" - CHECKSUM MISMATCH - Further information on the next line!!!")
return 'lossy'
def make_mediainfo(xmlfilename, xmlvariable, inputfilename):
'''
Writes a verbose mediainfo XML output using the OLDXML schema.
'''
mediainfo_cmd = [
'mediainfo',
'-f',
'--language=raw',
'--File_TestContinuousFileNames=0',
'--output=OLDXML',
inputfilename
]
with open(xmlfilename, "w") as fo:
# https://stackoverflow.com/a/21486747
try:
xmlvariable = subprocess.check_output(mediainfo_cmd).decode(sys.stdout.encoding)
except UnicodeDecodeError:
xmlvariable = subprocess.check_output(mediainfo_cmd).decode('cp1252')
fo.write(xmlvariable)
def make_exiftool(xmlfilename, inputfilename):
'''
Writes an exiftool json output.
'''
exiftool_cmd = [
'exiftool',
'-j',
inputfilename
]
with open(xmlfilename, "w", encoding='utf8') as fo:
try:
xmlvariable = subprocess.check_output(exiftool_cmd).decode(sys.stdout.encoding)
# exiftool has difficulties with unicode support on windows.
# instead of exiftool reading the file, the file is loading into memory
# and exiftool anaylses that instead.
# https://exiftool.org/exiftool_pod.html#WINDOWS-UNICODE-FILE-NAMES
except subprocess.CalledProcessError:
with open(inputfilename, 'rb') as file_object:
xmlvariable = subprocess.check_output(['exiftool', '-j', '-'], stdin=file_object).decode("utf-8")
fo.write(xmlvariable)
def make_siegfried(xmlfilename, inputfilename):
'''
Writes a Siegfried/PRONOM json report.
'''
siegfried_cmd = [
'sf',
'-json',
inputfilename
]
with open(xmlfilename, "w+") as fo:
xmlvariable = subprocess.check_output(siegfried_cmd).decode(sys.stdout.encoding)
parsed = json.loads(xmlvariable)
fo.write(json.dumps(parsed, indent=4, sort_keys=True))
def make_mediaconch(full_path, mediaconch_xmlfile):
'''
Creates a mediaconch implementation check XML report.
'''
mediaconch_cmd = [
'mediaconch',
'-fx',
full_path
]
print(' - Mediaconch is analyzing %s' % full_path)
mediaconch_output = subprocess.check_output(mediaconch_cmd).decode(sys.stdout.encoding)
with open(mediaconch_xmlfile, 'w') as xmlfile:
xmlfile.write(mediaconch_output)
def extract_provenance(filename, output_folder, output_uuid):
'''
This will extract dfxml, mediainfo and mediatrace XML
Need to add a workaround for TIFF as folders are not
processed as a whole.
'''
inputxml = "%s/%s_source_mediainfo.xml" % (output_folder, output_uuid)
inputtracexml = "%s/%s_source_mediatrace.xml" % (output_folder, output_uuid)
dfxml = "%s/%s_source_dfxml.xml" % (output_folder, output_uuid)
print(' - Generating mediainfo xml of input file and saving it in %s' % inputxml)
make_mediainfo(inputxml, 'mediaxmlinput', filename)
print(' - Generating mediatrace xml of input file and saving it in %s' % inputtracexml)
make_mediatrace(inputtracexml, 'mediatracexmlinput', filename)
if os.path.isfile(filename):
filename = os.path.dirname(filename)
# check if file without extension is provided
if filename == '':
filename = os.path.abspath(filename)
print(' - Generating Digital Forensics xml of input directory and saving it in %s' % dfxml)
makedfxml.main([filename, '-n', '-o', dfxml])
return inputxml, inputtracexml, dfxml
def generate_mediainfo_xmls(filename, output_folder, output_uuid, log_name_source):
'''
This will add the mediainfo xmls to the package
'''
inputxml, inputtracexml, dfxml = extract_provenance(filename, output_folder, output_uuid)
mediainfo_version = get_mediainfo_version()
generate_log(
log_name_source,
'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version)
)
generate_log(
log_name_source,
'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version)
)
generate_log(
log_name_source,
'EVENT = Metadata extraction - eventDetail=File system metadata extraction using Digital Forensics XML, eventOutcome=%s, agentName=makedfxml' % (dfxml)
)
return inputxml, inputtracexml, dfxml
def make_qctools(input):
'''
Runs an ffprobe process that stores QCTools XML info as a variable.
A file is not actually created here.
'''
qctools_args = ['ffprobe', '-f', 'lavfi', '-i',]
qctools_args += ["movie=%s:s=v+a[in0][in1],[in0]signalstats=stat=tout+vrep+brng,cropdetect=reset=1:round=1,split[a][b];[a]field=top[a1];[b]field=bottom[b1],[a1][b1]psnr[out0];[in1]ebur128=metadata=1,astats=metadata=1:reset=1:length=0.4[out1]" % input]
qctools_args += ['-show_frames', '-show_versions', '-of', 'xml=x=1:q=1', '-noprivate']
print(qctools_args)
qctoolsreport = subprocess.check_output(qctools_args)
return qctoolsreport
def write_qctools_gz(qctoolsxml, sourcefile):
'''
This accepts a variable containing XML that is written to a file.
'''
with open(qctoolsxml, "w+") as fo:
fo.write(make_qctools(sourcefile))
subprocess.call(['gzip', qctoolsxml])
def get_audio_stream_count():
'''
Returns the number of audio streams in the form of an INT.
'''
ffprobe_cmd = [
'ffprobe', '-v',
'error', '-select_streams', 'a',
'-show_entries', 'stream=index', '-of', 'flat',
sys.argv[1]
]
audio_stream_count = subprocess.check_output(ffprobe_cmd).splitlines()
return len(audio_stream_count)
def get_mediainfo(var_type, type, filename):
'''
Uses mediainfo to extract a single item of metadata
example:
duration = get_mediainfo(
'duration', '--inform=General;%Duration_String4%', sys.argv[1]
)
'''
mediainfo_cmd = [
'mediainfo',
'--Language=raw',
'--Full',
type,
filename
]
var_type = subprocess.check_output(mediainfo_cmd).decode(sys.stdout.encoding).replace('\n', '').replace('\r', '')
return var_type
def get_milliseconds(filename):
'''
Returns a float with the duration of a file in milliseconds.
'''
milliseconds = get_mediainfo(
'miliseconds',
'--inform=General;%Duration%',
filename
)
return float(milliseconds)
def convert_millis(milli):
'''
Accepts milliseconds and returns this value as HH:MM:SS.NNN
'''
a = datetime.timedelta(milliseconds=milli)
b = str(a)
# no millseconds are present if there is no remainder. We need milliseconds!
if len(b) == 7:
b += '.000000'
timestamp = datetime.datetime.strptime(b, "%H:%M:%S.%f").time()
c = str(timestamp)
if len(c) == 8:
c += '.000000'
return str(c)[:-3]
def send_gmail(email_to, attachment, subject, email_body, email_address, password):
'''
Rarely used but working emailer.
'''
emailfrom = ""
emailto = email_to
#emailto = ", ".join(emailto)
fileToSend = attachment
username = email_address
password = password
msg = MIMEMultipart()
msg["From"] = emailfrom
msg["To"] = ", ".join(emailto)
msg["Subject"] = subject
msg.preamble = "testtesttest"
body = MIMEText(email_body)
msg.attach(body)
ctype, encoding = mimetypes.guess_type(fileToSend)
if ctype is None or encoding is not None:
ctype = "application/octet-stream"
maintype, subtype = ctype.split("/", 1)
if maintype == "text":
fp = open(fileToSend)
# Note: we should handle calculating the charset
attachment = MIMEText(fp.read(), _subtype=subtype)
fp.close()
elif maintype == "image":
fp = open(fileToSend, "rb")
attachment = MIMEImage(fp.read(), _subtype=subtype)
fp.close()
elif maintype == "audio":
fp = open(fileToSend, "rb")
attachment = MIMEAudio(fp.read(), _subtype=subtype)
fp.close()
else:
fp = open(fileToSend, "rb")
attachment = MIMEBase(maintype, subtype)
attachment.set_payload(fp.read())
attachment.add_header("Content-Disposition", "attachment", filename=fileToSend)
msg.attach(attachment)
server_ssl = smtplib.SMTP_SSL("smtp.gmail.com", 465)
server_ssl.ehlo() # optional, called by login()
server_ssl.login(username, password)
# ssl server doesn't support or need tls, so don't call server_ssl.starttls()
server_ssl.sendmail(emailfrom, emailto, msg.as_string())
print(msg.as_string())
#server_ssl.quit()
server_ssl.close()
print(' - successfully sent the mail')
def frames_to_seconds(audio_entry_point):
audio_frame_count = float(audio_entry_point)
audio_frame_count = float(audio_frame_count) / 24.000 # Change to EditRate variable.
audio_frame_count = round(audio_frame_count, 3)
return audio_frame_count
def set_environment(logfile):
env_dict = os.environ.copy()
# https://github.com/imdn/scripts/blob/0dd89a002d38d1ff6c938d6f70764e6dd8815fdd/ffmpy.py#L272
env_dict['FFREPORT'] = 'file={}:level=48'.format(logfile)
return env_dict
def generate_log(log, what2log):
if not os.path.isfile(log):
with open(log, "w", encoding='utf-8') as fo:
fo.write(time.strftime("%Y-%m-%dT%H:%M:%S ")
+ getpass.getuser()
+ ' ' + what2log + ' \n')
else:
with open(log, "a", encoding='utf-8') as fo:
fo.write(time.strftime("%Y-%m-%dT%H:%M:%S ")
+ getpass.getuser()
+ ' ' + what2log + ' \n')
def hashlib_md5(filename):
'''
uses hashlib to return an MD5 checksum of an input filename
'''
read_size = 0
last_percent_done = 0
m = hashlib.md5()
total_size = os.path.getsize(filename)
with open(str(filename), 'rb') as f:
while True:
buf = f.read(2**20)
if not buf:
break
read_size += len(buf)
m.update(buf)
percent_done = 100 * read_size / total_size
if percent_done > last_percent_done:
sys.stdout.write('[%d%%]\r' % percent_done)
sys.stdout.flush()
last_percent_done = percent_done
md5_output = m.hexdigest()
return md5_output
def hashlib_sha512(filename):
'''
Note, this should eventually merged with the hashlib_md5 function.
uses hashlib to return an sha512 checksum of an input filename
'''
read_size = 0
last_percent_done = 0
m = hashlib.sha512()
total_size = os.path.getsize(filename)
with open(str(filename), 'rb') as f:
while True:
buf = f.read(2**20)
if not buf:
break
read_size += len(buf)
m.update(buf)
percent_done = 100 * read_size / total_size
if percent_done > last_percent_done:
sys.stdout.write('[%d%%]\r' % percent_done)
sys.stdout.flush()
last_percent_done = percent_done
sha512_output = m.hexdigest()
return sha512_output
def hashlib_manifest(manifest_dir, manifest_textfile, path_to_remove):
'''
Creates an MD5 manifest with relative filepaths.
'''
file_count = 0
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if not f[0] == '.']
directories[:] = [d for d in directories if not d[0] == '.']
for files in filenames:
#print(" - Calculating number of files to process in current directory - %s files \r "% file_count)
print("- Calculating number of files to process in current directory - {0} files ".format(file_count), end="\r")
file_count += 1
manifest_generator = ''
md5_counter = 1
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if f[0] != '.']
directories[:] = [d for d in directories if d[0] != '.']
for files in filenames:
print(' - Generating MD5 for %s - file %d of %d' % (os.path.join(root, files), md5_counter, file_count))
md5 = hashlib_md5(os.path.join(root, files))
md5_counter += 1
root2 = os.path.abspath(root).replace(path_to_remove, '')
try:
if root2[0] == '/':
root2 = root2[1:]
if root2[0] == '\\':
root2 = root2[1:]
except: IndexError
manifest_generator += md5[:32] + ' ' + os.path.join(root2, files).replace("\\", "/") + '\n'
manifest_list = manifest_generator.splitlines()
files_in_manifest = len(manifest_list)
# http://stackoverflow.com/a/31306961/2188572
manifest_list = sorted(manifest_list, key=lambda x: (x[34:]))
with open(manifest_textfile, "w", encoding='utf-8') as fo:
for i in manifest_list:
fo.write((unicodedata.normalize('NFC', i) + '\n'))
def sha512_manifest(manifest_dir, manifest_textfile, path_to_remove):
'''
Note: This should be merged with hashlib_manifest()
Creates a sha512 manifest with relative filepaths.
'''
file_count = 0
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if not f[0] == '.']
directories[:] = [d for d in directories if not d[0] == '.']
for files in filenames:
print(" - Calculating number of files to process in current directory - %s files \r"% file_count,)
file_count += 1
manifest_generator = ''
md5_counter = 1
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if f[0] != '.']
directories[:] = [d for d in directories if d[0] != '.']
for files in filenames:
print(' - Generating SHA512 for %s - file %d of %d' % (os.path.join(root, files), md5_counter, file_count))
sha512 = hashlib_sha512(os.path.join(root, files))
md5_counter += 1
root2 = os.path.abspath(root).replace(path_to_remove, '')
try:
if root2[0] == '/':
root2 = root2[1:]
if root2[0] == '\\':
root2 = root2[1:]
except: IndexError
manifest_generator += sha512[:128] + ' ' + os.path.join(root2, files).replace("\\", "/") + '\n'
manifest_list = manifest_generator.splitlines()
files_in_manifest = len(manifest_list)
# http://stackoverflow.com/a/31306961/2188572
manifest_list = sorted(manifest_list, key=lambda x: (x[130:]))
with open(manifest_textfile, "w", encoding='utf-8') as fo:
for i in manifest_list:
fo.write((unicodedata.normalize('NFC', i) + '\n'))
def hashlib_append(manifest_dir, manifest_textfile, path_to_remove):
'''
Lazy rehash of hashlib_manifest, except this just adds files to an existing manifest.
'''
file_count = 0
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if not f[0] == '.']
directories[:] = [d for d in directories if not d[0] == '.']
for files in filenames:
print(" - Calculating number of files to process in current directory - %s files \r"% file_count,)
file_count += 1
manifest_generator = ''
md5_counter = 1
for root, directories, filenames in os.walk(manifest_dir):
filenames = [f for f in filenames if not f[0] == '.']
directories[:] = [d for d in directories if not d[0] == '.']
for files in filenames:
print(' - Generating MD5 for %s - file %d of %d' % (os.path.join(root, files), md5_counter, file_count))
md5 = hashlib_md5(os.path.join(root, files))
md5_counter += 1
root2 = os.path.abspath(root).replace(path_to_remove, '')
try:
if root2[0] == '/':
root2 = root2[1:]
if root2[0] == '\\':
root2 = root2[1:]
except: IndexError
manifest_generator += md5[:32] + ' ' + os.path.join(root2, files).replace("\\", "/") + '\n'
manifest_list = manifest_generator.splitlines()
files_in_manifest = len(manifest_list)
# http://stackoverflow.com/a/31306961/2188572
manifest_list = sorted(manifest_list, key=lambda x: (x[34:]))
with open(manifest_textfile, "a", encoding='utf-8') as fo:
for i in manifest_list:
fo.write((unicodedata.normalize('NFC', i) + '\n'))
def make_manifest(manifest_dir, relative_manifest_path, manifest_textfile):
os.chdir(manifest_dir)
if not os.path.isfile(manifest_textfile):
manifest_generator = subprocess.check_output(['md5deep', '-ler', relative_manifest_path])
manifest_list = manifest_generator.splitlines()
files_in_manifest = len(manifest_list)
# http://stackoverflow.com/a/31306961/2188572
manifest_list = sorted(manifest_list, key=lambda x: (x[34:]))
with open(manifest_textfile, "w", encoding='utf-8') as fo:
for i in manifest_list:
fo.write((unicodedata.normalize('NFC', i) + '\n'))
return files_in_manifest
else:
print(' - Manifest already exists')
sys.exit()
def make_mediatrace(tracefilename, xmlvariable, inputfilename):
mediatrace_cmd = [
'mediainfo',
'-f',
'--Details=1', '--File_TestContinuousFileNames=0', # Use verbose output.
'--output=XML',
inputfilename
]
with open(tracefilename, "w") as fo:
# https://stackoverflow.com/a/21486747
try:
xmlvariable = subprocess.check_output(mediatrace_cmd).decode(sys.stdout.encoding)
except UnicodeDecodeError:
try:
xmlvariable = subprocess.check_output(mediatrace_cmd).decode('cp1252')
except UnicodeDecodeError:
# Some N19/STL files seem to produce characters which are incompatible with uff-8 and cp1252.
xmlvariable = 'None'
fo.write(xmlvariable)
def check_overwrite(file2check):
if os.path.isfile(file2check):
print(' - A manifest already exists at your destination. Overwrite? Y/N?')
overwrite_destination_manifest = ''
while overwrite_destination_manifest not in ('Y', 'y', 'N', 'n'):
overwrite_destination_manifest = input()
if overwrite_destination_manifest not in ('Y', 'y', 'N', 'n'):
print(' - Incorrect input. Please enter Y or N')
return overwrite_destination_manifest
def manifest_file_count(manifest2check):
'''
Checks how many entries are in a manifest
'''
if os.path.isfile(manifest2check):
print(' - A manifest already exists')
with open(manifest2check, "r") as fo:
manifest_lines = [line.split(',') for line in fo.readlines()]
count_in_manifest = len(manifest_lines)
return count_in_manifest
def create_csv(csv_file, *args):
f = open(csv_file, 'w', newline='')
try:
writer = csv.writer(f)
writer.writerow(*args)
finally:
f.close()
def append_csv(csv_file, *args):
f = open(csv_file, 'a', newline='')
try:
writer = csv.writer(f)
writer.writerow(*args)
finally:
f.close()
def sort_csv(csv_file, key):
'''
Sorts a csv_file by a key. The key being a field heading.
'''
new_filename = os.path.splitext(os.path.basename(csv_file))[0] + '_sorted.csv'
sorted_filepath = os.path.join(os.path.dirname(csv_file), new_filename)
values, fieldnames = extract_metadata(csv_file)
with open(sorted_filepath, 'w') as csvfile:
newlist = sorted(values, key=operator.itemgetter(key))
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in newlist:
writer.writerow(i)
return sorted_filepath
def make_desktop_manifest_dir():
desktop_manifest_dir = os.path.expanduser("~/Desktop/moveit_manifests")
if not os.path.isdir(desktop_manifest_dir):
#I should probably ask permission here, or ask for alternative location
os.makedirs(desktop_manifest_dir)
os.makedirs(os.path.join(desktop_manifest_dir, 'old_manifests'))
else:
if not os.path.isdir(os.path.join(desktop_manifest_dir, 'old_manifests')):
os.makedirs(os.path.join(desktop_manifest_dir, 'old_manifests'))
return desktop_manifest_dir
def make_desktop_logs_dir():
desktop_logs_dir = os.path.expanduser("~/Desktop/ifiscripts_logs")
if not os.path.isdir(desktop_logs_dir):
#I should probably ask permission here, or ask for alternative location
os.makedirs(desktop_logs_dir)
return desktop_logs_dir
def get_image_sequence_files(directory):
# This function accepts a directory as input, and checks returns a list of files in an image sequence.
os.chdir(directory)
tiff_check = glob('*.tiff')
dpx_check = glob('*.dpx')
tif_check = glob('*.tif')
if len(dpx_check) > 0:
images = dpx_check
images.sort()
elif len(tiff_check) > 0:
images = tiff_check
images.sort()
elif len(tif_check) > 0:
images = tif_check
images.sort()
else:
return 'none'
return images
def check_multi_reel(directory):
# This function accepts a directory as input. It checks if there are
# subdirectories that contain image sequences.
image_sequences = []
for dirs in os.listdir(directory):
full_path = (os.path.join(directory, dirs))
if os.path.isdir(full_path):
if get_image_sequence_files(full_path) is not 'none':
image_sequences.append(full_path)
return sorted (image_sequences)
def get_ffmpeg_friendly_name(images):
'''
Parses image sequence filenames so that they are easily passed to ffmpeg.
'''
if '864000' in images[0]:
start_number = '864000'
elif len(images[0].split("_")[-1].split(".")) > 2:
start_number = images[0].split("_")[-1].split(".")[1]
else:
start_number = images[0].split("_")[-1].split(".")[0]
container = images[0].split(".")[-1]
if len(images[0].split("_")[-1].split(".")) > 2:
numberless_filename = images[0].split(".")
else:
numberless_filename = images[0].split("_")[0:-1]
ffmpeg_friendly_name = ''
counter = 0
if len(images[0].split("_")[-1].split(".")) > 2:
numberless_filename = images[0].split(".")[0:-1]
for i in numberless_filename[:-1]:
ffmpeg_friendly_name += i + '.'
print(ffmpeg_friendly_name)
else:
while counter < len(numberless_filename):
ffmpeg_friendly_name += numberless_filename[counter] + '_'
counter += 1
return ffmpeg_friendly_name, container, start_number
def parse_image_sequence(images):
'''
Parses image sequence filenames so that they are easily passed to ffmpeg.
'''
if '864000' in images[0]:
start_number = '864000'
elif len(images[0].split("_")[-1].split(".")) > 2:
start_number = images[0].split("_")[-1].split(".")[1]
else:
start_number = images[0].split("_")[-1].split(".")[0]
container = images[0].split(".")[-1]
numberless_filename = images[0].split("_")[0:-1]
ffmpeg_friendly_name = ''
counter = 0
if len(images[0].split("_")[-1].split(".")) > 2:
numberless_filename = images[0].split(".")[0:-1]
for i in numberless_filename[:-1]:
ffmpeg_friendly_name += i + '.'
else:
while counter < len(numberless_filename):
ffmpeg_friendly_name += numberless_filename[counter] + '_'
counter += 1
if len(images[0].split("_")[-1].split(".")) > 2:
image_seq_without_container = ffmpeg_friendly_name[:-1] + ffmpeg_friendly_name[-1].replace('_', '.')
ffmpeg_friendly_name = image_seq_without_container
start_number_length = len(start_number)
number_regex = "%0" + str(start_number_length) + 'd.'
# remove trailing underscore
root_filename = ffmpeg_friendly_name[:-1]
ffmpeg_friendly_name += number_regex + '%s' % container
fps = get_mediainfo('duration', '--inform=Image;%FrameRate%', images[0])
if fps == '':
fps = 24
return ffmpeg_friendly_name, start_number, root_filename, fps
def get_date_modified(filename):
"""Gets the date modified date of a filename in ISO8601 style.
Date created values seem to be difficult to grab in a cross-platform way.
Args:
filename: Path of filename to check.
Returns:
date_modified: string, for example '2016-12-19T21:30:43'
"""
epoch_time = os.path.getmtime(filename)
date_modified = datetime.datetime.fromtimestamp(epoch_time).strftime("%Y-%m-%dT%H:%M:%S")
return date_modified
def create_uuid():
'''
Returns a randonly generated UUID as a string
'''
new_uuid = str(uuid.uuid4())
return new_uuid
def make_folder_structure(path):
'''
Makes logs, objects, metadata directories in the supplied path
UNITTEST - do paths exist
'''
metadata_dir = "%s/metadata" % path
log_dir = "%s/logs" % path
data_dir = "%s/objects" % path
# Actually create the directories.
os.makedirs(metadata_dir)
os.makedirs(data_dir)
os.makedirs(log_dir)
def get_user():
'''
Asks user who they are. Returns a string with their name
'''
user = ''
if user not in ('1', '2', '3', '4', '5', '6', '7', '8', '9'):
user = input(
'\n\n**** Who are you?\nPress 1,2,3,4,5,6,7,8,9\n\n1. Aoife Fitzmaurice\n2. Gavin Martin\n3. Kieran O\'Leary\n4. Raelene Casey\n5. Noelia Romero\n6. Ana Truchanova\n7. Eoin O\'Donohoe\n8. Yujing Huang\n9. Colm Connolly\n'
)
while user not in ('1', '2', '3', '4', '5', '6', '7', '8', '9'):
user = input(
'\n\n**** Who are you?\nPress 1,2,3,4,5,6,7,8,9\n1. Aoife Fitzmaurice\n2. Gavin Martin\n3. Kieran O\'Leary\n4. Raelene Casey\n5. Noelia Romero\n6. Ana Truchanova\n7. Eoin O\'Donohoe\n8. Yujing Huang\n9. Colm Connolly\n'
)
if user == '1':
user = 'Aoife Fitzmaurice'
time.sleep(1)
elif user == '2':
user = 'Gavin Martin'
time.sleep(1)
elif user == '3':
user = 'Kieran O\'Leary'
time.sleep(1)
elif user == '4':
user = 'Raelene Casey'
time.sleep(1)
elif user == '5':
user = 'Noelia Romero'
time.sleep(1)
elif user == '6':
user = 'Ana Truchanova'
time.sleep(1)
elif user == '7':
user = 'Eoin O\'Donohoe'
time.sleep(1)
elif user == '8':
user = 'Yujing Huang'
time.sleep(1)
elif user == '9':
user = 'Colm Connolly'
time.sleep(1)
return user
def determine_user(args):
'''
Determine who is the user.
UNITTEST - check if user is a string. Use mock to simulate input.
'''
if args.user:
user = args.user
else:
user = get_user()
return user
def get_acquisition_type(acquisition_type):
'''
Asks user for the type of acquisition
'''
if acquisition_type not in ('1', '2', '4', '5', '7', '8', '13', '14'):
acquisition_type = input(
'\n\n**** What is the type of acquisition? - This will not affect Reproductions that have been auto-detected.\nPress 1,2,4,5,7,8,13,14\n\n1. IFB - deposited in compliance with IFB delivery requirements\n2. BAI - deposited in compliance with BAI delivery requirements\n4. Deposit\n5. Purchased for collection\n7. Unknown at present\n8. Arts Council- deposited in compliance with Arts council delivery requirements\n13. Reproduction\n14. Donation\n'
)
while acquisition_type not in ('1', '2', '4', '5', '7', '8', '13', '14'):
acquisition_type = input(
'\n\n**** What is the type of acquisition? - This will not affect Reproductions that have been auto-detected.\nPress 1,2,4,5,7,8,13,14\n\n1. IFB - deposited in compliance with IFB delivery requirements\n2. BAI - deposited in compliance with BAI delivery requirements\n4. Deposit\n5. Purchased for collection\n7. Unknown at present\n8. Arts Council- deposited in compliance with Arts council delivery requirements\n13. Reproduction\n14. Donation\n'
)
if acquisition_type == '1':
acquisition_type = ['1. IFB - deposited in compliance with IFB delivery requirements', 'Deposit', '1']
time.sleep(1)
elif acquisition_type == '2':
acquisition_type = ['2. BAI - deposited in compliance with BAI delivery requirements', 'Deposit', '2']
time.sleep(1)
elif acquisition_type == '4':
acquisition_type = ['4. Deposit', 'Deposit', '4']
time.sleep(1)
elif acquisition_type == '5':
acquisition_type = ['5. Purchased for collection', 'Purchase', '5']
time.sleep(1)
elif acquisition_type == '7':
acquisition_type = ['7. Unknown at present', 'Unknown', '7']
time.sleep(1)
elif acquisition_type == '8':
acquisition_type = ['Arts Council- deposited in compliance with Arts council delivery requirements', 'Deposit', '8']
time.sleep(1)
elif acquisition_type == '13':
acquisition_type = ['Reproduction', 'Reproduction', '13']
time.sleep(1)
elif acquisition_type == '14':
acquisition_type = ['Donation', 'Donation', '14']
time.sleep(1)
return acquisition_type
def sort_manifest(manifest_textfile):
'''
Sorts an md5 manifest in alphabetical order.
Some scripts like moveit.py will require a manifest to be ordered like this.
'''
with open(manifest_textfile, "r", encoding='utf-8') as fo:
manifest_lines = fo.readlines()
with open(manifest_textfile, "w", encoding='utf-8') as ba:
manifest_list = sorted(manifest_lines, key=lambda x: (x[34:]))
for i in manifest_list:
ba.write(i)
def concat_textfile(video_files, concat_file):
'''
Create concat textfile for all files in video_files
a condition is needed elsewhere to ensure concat_file is empty
'''
for video in video_files:
with open(concat_file, 'a') as textfile:
textfile.write('file \'%s\'\n' % video)
def sanitise_filenames(video_files):
'''
this just replaces quotes with underscores.
only used right now to make concat scripts work.
The change should only happen if user says YES
previous and current filename should be logged.
Also there should be a better way of returning the list.
'''
overwrite = ''
renamed_files = []
for video in video_files:
if '\'' in video:
print(' - A quote is in your filename %s , replace with underscore?' % video)
while overwrite not in ('Y', 'y', 'N', 'n'):
overwrite = input()
if overwrite not in ('Y', 'y', 'N', 'n'):
print(' - Incorrect input. Please enter Y or N')
if overwrite in ('Y', 'y'):
rename = video.replace('\'', '_')
os.rename(video, rename)
renamed_files.append(rename)
else:
renamed_files.append(video)
return renamed_files
def get_temp_concat(root_name):
'''
generates a temp file as a textfile for ffmpeg concatenation.
'''
temp_dir = tempfile.gettempdir()
video_concat_filename = os.path.basename(
root_name) + '_video_concat' + time.strftime("_%Y_%m_%dT%H_%M_%S")
# Slashes are significant for ffmpeg concat files.
if sys.platform == "win32":
video_concat_textfile = temp_dir + "\%s.txt" % video_concat_filename
else:
video_concat_textfile = temp_dir + "/%s.txt" % video_concat_filename
return video_concat_textfile
def get_script_version(scriptname):
'''
uses git to get SHA:DATETIME for a script
'''
current_dir = os.getcwd()
home = os.path.expanduser("~/")
os.chdir(home)
if os.path.isdir('ifigit/ifiscripts'):
os.chdir('ifigit/ifiscripts')
print("Changing directory to %s to extract script version`" % os.getcwd())
script_version = subprocess.check_output([
'git', 'log', '-n', '1', '--pretty=format:%H:%aI', scriptname
])
else:
script_version = 'Script version unavailable as the ifiscripts repository is not installed in $HOME/ifigit/ifiscripts'
os.chdir(current_dir)
return script_version
def validate_uuid4(uuid_string):
"""
Validate that a UUID string is in
fact a valid uuid4.
Written by ShawnMilo
https://gist.github.com/ShawnMilo/7777304#file-validate_uuid4-py
"""
try:
uuid.UUID(uuid_string, version=4)
except ValueError:
# If it's a value error, then the string
# is not a valid hex code for a UUID.
return False
def get_source_uuid():
'''
Asks user for uuid. A valid uuid must be provided.
'''
source_uuid = False
while source_uuid is False:
uuid_ = input(
'\n\n**** Please enter the UUID of the source representation\n\n'
)
source_uuid = validate_uuid4(uuid_)
return uuid_
def ask_question(question):
'''
Asks user a question. Return answer.
'''
answer = ''
while answer is '':
answer = input(
'\n\n**** %s\n\n'
% question)
proceed = 'n'
while proceed.lower() == 'n':
proceed = ask_yes_no('Are you really sure?')
return answer
def get_object_entry():
'''
Asks user for an Object Entry number. A valid Object Entry (OE####) must be provided.
'''
object_entry = False
while object_entry is False:
object_entry = input(
'\n\n**** Please enter the object entry number of the representation\n\n'
)
if object_entry[:4] == 'scoe':
return object_entry
if object_entry[:2] != 'oe':
print(' - First two characters must be \'oe\' and last four characters must be four digits')
object_entry = False
elif len(object_entry[2:]) not in range(4, 6):
object_entry = False
print(' - First two characters must be \'oe\' and last four characters must be four digits')
elif not object_entry[2:].isdigit():
object_entry = False
print(' - First two characters must be \'oe\' and last four characters must be four digits')
else:
return object_entry
def get_accession_number():
'''
Asks user for an accession number. A valid number (OE####) must be provided.
'''
accession_number = False
while accession_number is False:
accession_number = input(
'\n\n**** Please enter the accession number of the representation\n\n'
)
if accession_number[:3] != 'aaa':
print(' - First three characters must be \'aaa\' and last four characters must be four digits')
accession_number = False
elif len(accession_number[3:]) != 4:
accession_number = False
print(' - First three characters must be \'aaa\' and last four characters must be four digits')
elif not accession_number[3:].isdigit():
accession_number = False
print(' - First three characters must be \'aaa\' and last four characters must be four digits')
else:
return accession_number
def get_reference_number():
'''
Asks user for a Filmographic reference number. Due to the variety of reference numbers, validation
will be removed for now.
'''
reference_number = input(
'\n\n**** Please enter the Filmographic reference number of the representation- if there is more than one work that is represented, seperate them with an ampersand, eg af1234&aa675\n\n'
)
return reference_number.upper()
def get_contenttitletext(cpl):
'''
Returns the <ContentTitleText> element text from a DCP CPL.xml
'''