Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add BFF tests and updated detection #34

Merged
merged 2 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 40 additions & 17 deletions docubleach/bleach.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,29 @@ def unzip_file(file):


def detect_macros(file):
return VBA_Parser(file).detect_macros()
file_type = file.split(".")[-1].lower()

if file_type in bff_formats:
with OleFileIO(file, write_mode=False) as ole:
streams = ole.listdir(streams=True)
macro_streams = []

for stream in streams:
if stream[0] in bff_macro_folders:
macro_streams.append(stream)

# Check each macro stream to see if it's empty (bleached)
for macro_stream in macro_streams:
macro_stream_size = ole.get_size(macro_stream)
stream_contents = ole.openstream(macro_stream).read(macro_stream_size)
stream_bytes = bytes(bytearray(stream_contents))

if stream_bytes != bytes(bytearray(macro_stream_size)):
return True

return False
else:
return VBA_Parser(file).detect_macros()


def remove_macros(file, notify=False):
Expand All @@ -93,22 +115,23 @@ def remove_bff_macros(file, notify):
macros_found = False

if file_type == "doc" or file_type == "xls":
streams = OleFileIO(file).listdir(streams=True)
macro_streams = []

for stream in streams:
if stream[0] in bff_macro_folders:
macro_streams.append(stream)

ole = OleFileIO(file, write_mode=True)

for macro_stream in macro_streams:
macro_stream_size = ole.get_size(macro_stream)
ole.write_stream(macro_stream, bytes(bytearray(macro_stream_size)))
ole.close()

if len(macro_streams) > 0:
macros_found = True
with OleFileIO(file, write_mode=True) as ole:
streams = ole.listdir(streams=True)
macro_streams = []

for stream in streams:
if stream[0] in bff_macro_folders:
macro_streams.append(stream)

for macro_stream in macro_streams:
macro_stream_size = ole.get_size(macro_stream)
macro_stream_contents = ole.openstream(macro_stream).read(macro_stream_size)

# Check each macro stream to see if it's already empty (bleached)
if macro_stream_contents != bytes(bytearray(macro_stream_size)):
# Replace macro stream contents with empty bytes
ole.write_stream(macro_stream, bytes(bytearray(macro_stream_size)))
macros_found = True

if file_type == "ppt":
streams = OleFileIO(file).listdir(streams=True)
Expand Down
48 changes: 48 additions & 0 deletions tests/test_bff_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""These tests are designed to test the tool's compatibility with supported BFF file formats.

Each test runs the 'bleach' on their respective file.

The 'bleached' files are then scanned for macros using the 'detect_macros' function

Valid files containing macros are restored to their original form after testing to ensure test repeatability.

All tests are written for and conducted using pytest.
"""
from os import listdir, remove, rename
from shutil import copyfile

from docubleach.bleach import detect_macros, remove_macros


test_dir = "tests/test_files/bff_files/"


def setup_module():
for file in listdir(test_dir):
copyfile(test_dir + file, test_dir + file + ".bak")


def teardown_module():
for file in listdir(test_dir):
if file.split(".")[-1] != "bak":
remove(test_dir + file)

for file in listdir(test_dir):
if file.split(".")[-1] == "bak":
rename(test_dir + file, test_dir + file[:-4])


def test_legacy_word_document():
test_file = f"{test_dir}legacy_word_document.doc"

remove_macros(test_file)

assert detect_macros(test_file) is False


def test_legacy_excel_spreadsheet():
test_file = f"{test_dir}legacy_excel_spreadsheet.xls"

remove_macros(test_file)

assert detect_macros(test_file) is False
Binary file not shown.
Binary file not shown.