diff --git a/docubleach/bleach.py b/docubleach/bleach.py index 44a13b4..748d50d 100644 --- a/docubleach/bleach.py +++ b/docubleach/bleach.py @@ -73,7 +73,29 @@ def unzip_file(file): def detect_macros(file): - return VBA_Parser(file).detect_macros() + file_type = file.split(".")[-1].lower() + + if file_type in bff_formats: + with OleFileIO(file, write_mode=False) as ole: + streams = ole.listdir(streams=True) + macro_streams = [] + + for stream in streams: + if stream[0] in bff_macro_folders: + macro_streams.append(stream) + + # Check each macro stream to see if it's empty (bleached) + for macro_stream in macro_streams: + macro_stream_size = ole.get_size(macro_stream) + stream_contents = ole.openstream(macro_stream).read(macro_stream_size) + stream_bytes = bytes(bytearray(stream_contents)) + + if stream_bytes != bytes(bytearray(macro_stream_size)): + return True + + return False + else: + return VBA_Parser(file).detect_macros() def remove_macros(file, notify=False): @@ -93,22 +115,23 @@ def remove_bff_macros(file, notify): macros_found = False if file_type == "doc" or file_type == "xls": - streams = OleFileIO(file).listdir(streams=True) - macro_streams = [] - - for stream in streams: - if stream[0] in bff_macro_folders: - macro_streams.append(stream) - - ole = OleFileIO(file, write_mode=True) - - for macro_stream in macro_streams: - macro_stream_size = ole.get_size(macro_stream) - ole.write_stream(macro_stream, bytes(bytearray(macro_stream_size))) - ole.close() - - if len(macro_streams) > 0: - macros_found = True + with OleFileIO(file, write_mode=True) as ole: + streams = ole.listdir(streams=True) + macro_streams = [] + + for stream in streams: + if stream[0] in bff_macro_folders: + macro_streams.append(stream) + + for macro_stream in macro_streams: + macro_stream_size = ole.get_size(macro_stream) + macro_stream_contents = ole.openstream(macro_stream).read(macro_stream_size) + + # Check each macro stream to see if it's already empty (bleached) + if macro_stream_contents != bytes(bytearray(macro_stream_size)): + # Replace macro stream contents with empty bytes + ole.write_stream(macro_stream, bytes(bytearray(macro_stream_size))) + macros_found = True if file_type == "ppt": streams = OleFileIO(file).listdir(streams=True) diff --git a/tests/test_bff_files.py b/tests/test_bff_files.py new file mode 100644 index 0000000..8a3fee8 --- /dev/null +++ b/tests/test_bff_files.py @@ -0,0 +1,48 @@ +"""These tests are designed to test the tool's compatibility with supported BFF file formats. + +Each test runs the 'bleach' on their respective file. + +The 'bleached' files are then scanned for macros using the 'detect_macros' function + +Valid files containing macros are restored to their original form after testing to ensure test repeatability. + +All tests are written for and conducted using pytest. +""" +from os import listdir, remove, rename +from shutil import copyfile + +from docubleach.bleach import detect_macros, remove_macros + + +test_dir = "tests/test_files/bff_files/" + + +def setup_module(): + for file in listdir(test_dir): + copyfile(test_dir + file, test_dir + file + ".bak") + + +def teardown_module(): + for file in listdir(test_dir): + if file.split(".")[-1] != "bak": + remove(test_dir + file) + + for file in listdir(test_dir): + if file.split(".")[-1] == "bak": + rename(test_dir + file, test_dir + file[:-4]) + + +def test_legacy_word_document(): + test_file = f"{test_dir}legacy_word_document.doc" + + remove_macros(test_file) + + assert detect_macros(test_file) is False + + +def test_legacy_excel_spreadsheet(): + test_file = f"{test_dir}legacy_excel_spreadsheet.xls" + + remove_macros(test_file) + + assert detect_macros(test_file) is False diff --git a/tests/test_files/bff_files/legacy_excel_spreadsheet.xls b/tests/test_files/bff_files/legacy_excel_spreadsheet.xls new file mode 100644 index 0000000..884a95d Binary files /dev/null and b/tests/test_files/bff_files/legacy_excel_spreadsheet.xls differ diff --git a/tests/test_files/bff_files/legacy_word_document.doc b/tests/test_files/bff_files/legacy_word_document.doc new file mode 100644 index 0000000..257b08b Binary files /dev/null and b/tests/test_files/bff_files/legacy_word_document.doc differ