diff --git a/.github/workflows/auto-assign-pr.yml b/.github/workflows/auto-assign-pr.yml new file mode 100644 index 0000000..08c2361 --- /dev/null +++ b/.github/workflows/auto-assign-pr.yml @@ -0,0 +1,15 @@ +# .github/workflows/auto-author-assign.yml +name: 'Auto Author Assign' + +on: + pull_request_target: + types: [opened, reopened] + +permissions: + pull-requests: write + +jobs: + assign-author: + runs-on: ubuntu-latest + steps: + - uses: toshimaru/auto-author-assign@v1.6.2 diff --git a/.github/workflows/pre-commit-actions.yml b/.github/workflows/pre-commit-actions.yml new file mode 100644 index 0000000..3693520 --- /dev/null +++ b/.github/workflows/pre-commit-actions.yml @@ -0,0 +1,37 @@ +name: precommit-actions +run-name: ${{ github.actor }} is running precommit actions. +on: + push: + branches-ignore: [main] + pull_request: + branches: [main] +jobs: + build: + name: Lint + runs-on: ubuntu-latest + + permissions: + contents: read + packages: read + # To report GitHub Actions status checks + statuses: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + # super-linter needs the full git history to get the + # list of files that changed across commits + fetch-depth: 0 + + - name: Super-linter + uses: super-linter/super-linter@v6.5.1 # x-release-please-version + env: + DEFAULT_BRANCH: main + # To report GitHub Actions status checks + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + IGNORE_GITIGNORED_FILES: true + VALIDATE_ALL_CODEBASE: true + VALIDATE_PYTHON_FLAKE8: true + VALIDATE_HTML: true + FILTER_REGEX_EXCLUDE: '^tests/test_files/' diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows/semantic-pr.yml new file mode 100644 index 0000000..d93a930 --- /dev/null +++ b/.github/workflows/semantic-pr.yml @@ -0,0 +1,32 @@ +name: "Semantic PRs" + +on: + pull_request_target: + types: + - opened + - edited + - synchronize + +jobs: + main: + name: Validate PR title + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + # first type as the one from https://github.com/commitizen/conventional-commit-types/blob/master/index.json + # l10n (localization) is for translations updates + types: | + feat + fix + docs + style + refactor + test + build + ci + chore + revert + l10n diff --git a/.gitignore b/.gitignore index 68bc17f..2dc53ca 100644 --- a/.gitignore +++ b/.gitignore @@ -157,4 +157,4 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ diff --git a/README.md b/README.md index 19855d7..c7d3da0 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ A command-line tool designed to detect and purge any and all macros and dynamic content from commonly used office document formats (including MS Office Files, PDFs, etc.). +[![Super-Linter](https://github.com/Anti-Malware-Alliance/ms-office-macro-bleach/actions/workflows/pre-commit-actions.yml/badge.svg)](https://github.com/marketplace/actions/super-linter) + ## Supported formats ### Word diff --git a/docubleach/bleach.py b/docubleach/bleach.py index 9752246..35ce323 100644 --- a/docubleach/bleach.py +++ b/docubleach/bleach.py @@ -1,16 +1,21 @@ """This module is designed to purge any and all macros and dynamic content from commonly used office formats. -VBA and OLE content in MS Office files can, and have sometimes been made to, act as vehicles for malware delivery. +VBA and OLE content in MS Office files can, and have sometimes been made to, +act as vehicles for malware delivery. -Microsoft has previously attempted to protect users from macros by disabling them by default. +Microsoft has previously attempted to protect users from macros by disabling +them by default. -However, anybody is able to enable macros in an MS Office file before sending them on to a potential victim. +However, anybody is able to enable macros in an MS Office file before sending +them on to a potential victim. -This module enables users to simply and safely remove any and all macros/dynamic content from MS Office files. +This module enables users to simply and safely remove any and all +macros/dynamic content from MS Office files. It is part of a suite of programs developed by the AntiMalware Alliance. -Visit https://github.com/Anti-Malware-Alliance for more details about our organisation and projects. +Visit https://github.com/Anti-Malware-Alliance for more details +about our organisation and projects. """ from argparse import ArgumentParser from os import rename, path, remove @@ -19,7 +24,6 @@ from shutil import make_archive, rmtree from olefile import OleFileIO - ooxml_formats = [ "docx", "docm", @@ -46,7 +50,6 @@ bff_formats = [ "doc", - #"ppt", "xls", ] @@ -151,7 +154,8 @@ def validate_file(file): def main(): parser = ArgumentParser() parser.add_argument("file", help="file to be bleached") - parser.add_argument("-c", help="notify if macros or potentially dangerous content is found", action="store_true") + parser.add_argument("-c", help="notify if macros or potentially dangerous " + "content is found", action="store_true") args = parser.parse_args() if validate_file(args.file): diff --git a/pyproject.toml b/pyproject.toml index f3d56bc..64a6f88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,3 +18,6 @@ docubleach = "docubleach.bleach:main" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.flake8] +max-line-length = 95 diff --git a/tests/test_ooxml_files.py b/tests/test_ooxml_files.py index 9ff3918..7141cdf 100644 --- a/tests/test_ooxml_files.py +++ b/tests/test_ooxml_files.py @@ -25,7 +25,8 @@ def test_word_document(): def test_word_document_with_macros(): copyfile("test_files/word_document_with_macros.docm", "test_files/word_document_with_macros.docm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/word_document_with_macros.docm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/word_document_with_macros.docm -c", + encoding='utf-8') remove("test_files/word_document_with_macros.docm") rename("test_files/word_document_with_macros.docm.bak", "test_files/word_document_with_macros.docm") @@ -42,7 +43,8 @@ def test_word_template(): def test_word_template_with_macros(): copyfile("test_files/word_template_with_macros.dotm", "test_files/word_template_with_macros.dotm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/word_template_with_macros.dotm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/word_template_with_macros.dotm -c", + encoding='utf-8') remove("test_files/word_template_with_macros.dotm") rename("test_files/word_template_with_macros.dotm.bak", "test_files/word_template_with_macros.dotm") @@ -51,41 +53,50 @@ def test_word_template_with_macros(): def test_powerpoint_presentation(): - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_presentation.pptx -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_presentation.pptx -c", + encoding='utf-8') assert output == "" def test_powerpoint_presentation_with_macros(): - copyfile("test_files/powerpoint_presentation_with_macros.pptm", "test_files/powerpoint_presentation_with_macros.pptm.bak") + copyfile("test_files/powerpoint_presentation_with_macros.pptm", + "test_files/powerpoint_presentation_with_macros.pptm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_presentation_with_macros.pptm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_presentation_with_macros.pptm -c", + encoding='utf-8') remove("test_files/powerpoint_presentation_with_macros.pptm") - rename("test_files/powerpoint_presentation_with_macros.pptm.bak", "test_files/powerpoint_presentation_with_macros.pptm") + rename("test_files/powerpoint_presentation_with_macros.pptm.bak", + "test_files/powerpoint_presentation_with_macros.pptm") assert output == "Macros detected and removed.\n" def test_powerpoint_template(): - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_template.potx -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_template.potx -c", + encoding='utf-8') assert output == "" def test_powerpoint_template_with_macros(): - copyfile("test_files/powerpoint_template_with_macros.potm", "test_files/powerpoint_template_with_macros.potm.bak") + copyfile("test_files/powerpoint_template_with_macros.potm", + "test_files/powerpoint_template_with_macros.potm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_template_with_macros.potm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_template_with_macros.potm -c", + encoding='utf-8') remove("test_files/powerpoint_template_with_macros.potm") - rename("test_files/powerpoint_template_with_macros.potm.bak", "test_files/powerpoint_template_with_macros.potm") + rename("test_files/powerpoint_template_with_macros.potm.bak", + "test_files/powerpoint_template_with_macros.potm") assert output == "Macros detected and removed.\n" def test_powerpoint_show(): - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_show.ppsx -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_show.ppsx -c", + encoding='utf-8') assert output == "" @@ -93,7 +104,8 @@ def test_powerpoint_show(): def test_powerpoint_show_with_macros(): copyfile("test_files/powerpoint_show_with_macros.ppsm", "test_files/powerpoint_show_with_macros.ppsm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_show_with_macros.ppsm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/powerpoint_show_with_macros.ppsm -c", + encoding='utf-8') remove("test_files/powerpoint_show_with_macros.ppsm") rename("test_files/powerpoint_show_with_macros.ppsm.bak", "test_files/powerpoint_show_with_macros.ppsm") @@ -102,15 +114,18 @@ def test_powerpoint_show_with_macros(): def test_excel_spreadsheet(): - output = check_output(f"python {program_dir}bleach.py test_files/excel_spreadsheet.xlsx -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/excel_spreadsheet.xlsx -c", + encoding='utf-8') assert output == "" def test_excel_spreadsheet_with_macros(): - copyfile("test_files/excel_spreadsheet_with_macros.xlsm", "test_files/excel_spreadsheet_with_macros.xlsm.bak") + copyfile("test_files/excel_spreadsheet_with_macros.xlsm", + "test_files/excel_spreadsheet_with_macros.xlsm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/excel_spreadsheet_with_macros.xlsm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/excel_spreadsheet_with_macros.xlsm -c", + encoding='utf-8') remove("test_files/excel_spreadsheet_with_macros.xlsm") rename("test_files/excel_spreadsheet_with_macros.xlsm.bak", "test_files/excel_spreadsheet_with_macros.xlsm") @@ -127,7 +142,8 @@ def test_excel_template(): def test_excel_template_with_macros(): copyfile("test_files/excel_template_with_macros.xltm", "test_files/excel_template_with_macros.xltm.bak") - output = check_output(f"python {program_dir}bleach.py test_files/excel_template_with_macros.xltm -c", encoding='utf-8') + output = check_output(f"python {program_dir}bleach.py test_files/excel_template_with_macros.xltm -c", + encoding='utf-8') remove("test_files/excel_template_with_macros.xltm") rename("test_files/excel_template_with_macros.xltm.bak", "test_files/excel_template_with_macros.xltm")