sync with origin/main

lxndrblz · Jan 9, 2024 · a694fb8 · a694fb8
2 parents 30a83ff + 5ed3d56
commit a694fb8
Show file tree

Hide file tree

Showing 20 changed files with 269 additions and 135 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -15,7 +15,9 @@ jobs:
     - name: Install dependencies
       run: |
             python -m pip install --upgrade pip
-            python -m pip install -r requirements.txt pyinstaller
+            python -m pip install .[dev] pyinstaller
+    - name: Lint code 🦄
+      run: tox -e lint
     - name: Build binary 🔢
       run:  pyinstaller "main.spec"
     - name: Run conversion ↩️
@@ -24,15 +26,14 @@ jobs:
            .\dist\ms_teams_parser.exe -f ".\forensicsim-data\john_doe_v_1_4_00_11161\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "john_doe.json"
            .\dist\ms_teams_parser.exe -f ".\forensicsim-data\karelze_v_23306_3309_2530_1346\IndexedDB\https_teams.live.com_0.indexeddb.leveldb" -o "karelze.json"
            .\dist\ms_teams_parser.exe -f ".\forensicsim-data\mboufahja_v_23231_413_2355_7555\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "mboufahja.json"
-      continue-on-error: false
     - name: Test calling script 📞
       run: |
-            python utils/dump_leveldb.py --help
-            python utils/dump_localstorage.py --help
-            python utils/dump_sessionstorage.py --help
-      continue-on-error: false
-#              python utils/populate_teams.py --help
-#              python utils/populate_teams_2.py --help
-#              python utils/populate_skype.py --help
+            python tools/main.py --help
+            python tools/dump_leveldb.py --help
+            python tools/dump_localstorage.py --help
+            python tools/dump_sessionstorage.py --help
+#           python utils/populate_teams.py --help
+#           python utils/populate_teams_2.py --help
+#           python utils/populate_skype.py --help
     # - name: Calculate diff 👽
     #   run: git diff --no-index --word-diff expected_output/john_doe.json current_output.json
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -16,13 +16,18 @@ jobs:
     - name: Install dependencies
       run: |
             python -m pip install --upgrade pip
-            python -m pip install -r requirements.txt pyinstaller
+            python -m pip install . pyinstaller
     - name: Build binary
       run:  pyinstaller "main.spec"
+    - name: Zip files 🗜️
+      run: |
+            cp Forensicsim_Parser.py dist/
+            cd dist
+            tar.exe -a -cf forensicsim.zip Forensicsim_Parser.py ms_teams_parser.exe
     - name: Upload artifacts to GitHub Release
       env:
             GITHUB_TOKEN: ${{ github.token }}
       run: >-
         gh release upload
-        '${{ github.ref_name }}' dist/ms_teams_parser.exe
+        '${{ github.ref_name }}' dist/forensicsim.zip
         --repo '${{ github.repository }}'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,5 +17,8 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.1.11
     hooks:
+    - id: ruff
+      args:
+      - --fix
     - id: ruff-format
 exclude: "^(export|populationdata|testdata)"
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ If you are curious about the artefacts that are generate by Microsoft Teams, I w
 This module requires the installation of Autopsy v4.18 or above and a *Windows*-based system.
 
 To install the *Microsoft Teams* parser for *Autopsy*, please follow these steps:
-* Download the `.zip` folder and the `.exe` file of the latest available [release](https://github.com/lxndrblz/forensicsim/releases).
+* Download the `forensicsim.zip` folder of the latest available [release](https://github.com/lxndrblz/forensicsim/releases).
 * Extract the `.zip` folder onto your computer.
 * Open the Windows File Explorer and navigate to your *Autopsy* Python plugin directory. By default, it is located under `%AppData%\autopsy\python_modules`.
 * Create a new `forensicsim` folder within the `python_modules` folder.
@@ -121,7 +121,6 @@ as following. Simply specify the path to the database and where you want to outp
 
 usage: dump_leveldb.py [-h] -f FILEPATH -o OUTPUTPATH
 dump_leveldb.py: error: the following arguments are required: -f/--filepath, -o/--outputpath
-
 ```
 ---
 
@@ -131,24 +130,23 @@ dump_leveldb.py: error: the following arguments are required: -f/--filepath, -o/
 
 A wee script for populating *Skype for Desktop* in a lab environment. The script can be used like this:
 
-```
-utils\populate_skype.py -a 0 -f conversation.json
+```bash
+tools\populate_skype.py -a 0 -f conversation.json
 ```
 
 ## populate_teams.py
 
 A wee script for populating *Microsoft Teams* in a lab environment. The script can be used like this:
 
-```
-utils\populate_teams.py -a 0 -f conversation.json
+```bash
+tools\populate_teams.py -a 0 -f conversation.json
 ```
 
 ---
 # Datasets
 This repository comes with two datasets that allow reproducing the findings of this work. The `testdata` folder contains the *LevelDB* databases that have been extracted from two test clients. These can be used for benchmarking without having to perform a (lengthy) data population.
 
 The `populationdata` contains *JSON* files of the communication that has been populated into the testing environment. These can be used to reproduce the experiment from scratch. However, for a rerun, it will be essential to adjust the dates to future dates, as the populator script relies on sufficient breaks between the individual messages.
-
 ---
 
 # Acknowledgements & Thanks

diff --git a/build.bat b/build.bat
diff --git a/main.spec b/main.spec
@@ -3,7 +3,7 @@
 block_cipher = None
 
 
-a = Analysis(['utils\\main.py'],
+a = Analysis(['tools\\main.py'],
              binaries=[],
              datas=[],
              hiddenimports=[],

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,114 @@
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
+
+[project]
+name = "forensicsim"
+description = "A forensic open-source parser module for Autopsy that allows extracting the messages, comments, posts, contacts, calendar entries and reactions from a Microsoft Teams IndexedDB LevelDB database."
+readme = "README.md"
+license = {file = "LICENSE.md"}
+requires-python = ">=3.9"
+authors = [
+    { name = "Alexander Bilz", email = "[email protected]" },
+    { name = "Markus Bilz", email = "[email protected]" }
+]
+dependencies = [
+"beautifulsoup4~=4.9.3",
+"click~=8.0.1",
+"chromedb @ git+https://github.com/karelze/ccl_chrome_indexeddb@master",
+"pause~=0.3",
+"pyautogui~=0.9.54",
+"pywinauto~=0.6.8"
+]
+
+dynamic = ["version"]
+
+[tool.setuptools.dynamic]
+version = {attr = "forensicsim.__version__"}
+
+[project.urls]
+"Homepage" = "https://forensics.im/"
+"Bug Tracker" = "https://github.com/lxndrblz/forensicsim/issues"
+
+[project.optional-dependencies]
+dev=[
+    "build",
+    "pre-commit",
+    "ruff",
+    "tox",
+]
+
+
+[tool.ruff]
+
+target-version = "py39"
+
+# See rules: https://beta.ruff.rs/docs/rules/
+select = [
+    "C",    # flake8-comprehensions
+    "F",    # pyflakes
+    "FURB", # refurb
+    "I",    # isort
+    "PIE",  # misc lints
+    "PT",   # pytest
+    "PGH",  # pygrep
+    "RUF",  # ruff-specific rules
+    "UP",   # pyupgrade
+    "SIM",  # flake8-simplify
+]
+
+include = ["*.py", "*.pyi", "**/pyproject.toml"]
+
+ignore = [
+    "C901",  # too complex
+    "E501",  # line too long, handled by black
+    "D206",  # indent with white space
+    "W191",  # tab identation
+]
+
+[tool.ruff.lint]
+preview = true
+# exclude = ["tools/**.py"]
+
+[tool.ruff.format]
+preview = true
+
+[tool.ruff.isort]
+known-first-party = ["forensicsim"]
+section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
+
+[tool.tox]
+legacy_tox_ini = """
+
+[tox]
+envlist = format, lint, pre-commit
+skipdist = True
+isolated_build = True
+
+[testenv]
+deps = .[dev]
+
+# Cleanup tasks
+[testenv:clean]
+commands =
+    sh -c "rm -rf build cover dist .hypothesis .mypy_cache .pytest_cache site"
+
+# Auto Formatting
+[testenv:format]
+commands =
+    python -m ruff src tests --fix
+    python -m ruff format src
+
+# Syntax Checks
+[testenv:lint]
+commands =
+    python -m ruff --output-format=github src
+    python -m ruff format src --check
+
+# Pre-Commit
+[testenv:pre-commit]
+commands =
+    python -m pre-commit run --all-files --show-diff-on-failure
+
+"""
diff --git a/requirements.txt b/requirements.txt
diff --git a/src/forensicsim/__init__.py b/src/forensicsim/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.5.3"
diff --git a/utils/shared.py → src/forensicsim/backend.py b/utils/shared.py → src/forensicsim/backend.py
@@ -29,10 +29,10 @@
 from chromedb import (
     ccl_blink_value_deserializer,
     ccl_chromium_indexeddb,
-    ccl_v8_value_deserializer,
-    ccl_leveldb,
     ccl_chromium_localstorage,
     ccl_chromium_sessionstorage,
+    ccl_leveldb,
+    ccl_v8_value_deserializer,
 )
 from chromedb.ccl_chromium_indexeddb import (
     DatabaseMetadataType,
@@ -77,19 +77,18 @@ def fetch_data(self):
             if (
                 record.key.startswith(b"\x00\x00\x00\x00")
                 and record.state == ccl_leveldb.KeyState.Live
+            ) and (
+                record.key not in global_metadata_raw
+                or global_metadata_raw[record.key].seq < record.seq
             ):
-                if (
-                    record.key not in global_metadata_raw
-                    or global_metadata_raw[record.key].seq < record.seq
-                ):
-                    global_metadata_raw[record.key] = record
+                global_metadata_raw[record.key] = record
 
         # Convert the raw metadata to a nice GlobalMetadata Object
         global_metadata = ccl_chromium_indexeddb.GlobalMetadata(global_metadata_raw)
 
         # Loop through the database IDs
         for db_id in global_metadata.db_ids:
-            if None == db_id.dbid_no:
+            if db_id.dbid_no == None:
                 continue
 
             if db_id.dbid_no > 0x7F:
@@ -130,9 +129,11 @@ def fetch_data(self):
 
                     meta_type = record.key[len(prefix_objectstore) + len(varint_raw)]
 
-                    old_version = objectstore_metadata_raw.get(
-                        (db_id.dbid_no, objstore_id, meta_type)
-                    )
+                    old_version = objectstore_metadata_raw.get((
+                        db_id.dbid_no,
+                        objstore_id,
+                        meta_type,
+                    ))
 
                     if old_version is None or old_version.seq < record.seq:
                         objectstore_metadata_raw[
@@ -160,7 +161,7 @@ def iterate_records(self, do_not_filter=False):
         # Loop through the databases and object stores based on their ids
         for global_id in self.global_metadata.db_ids:
             # print(f"Processing database: {global_id.name}")
-            if None == global_id.dbid_no:
+            if global_id.dbid_no == None:
                 print(f"WARNING: Skipping database {global_id.name}")
                 continue
 
@@ -188,7 +189,7 @@ def iterate_records(self, do_not_filter=False):
                             if record.value == b"":
                                 continue
                             (
-                                value_version,
+                                _value_version,
                                 varint_raw,
                             ) = ccl_chromium_indexeddb.le_varint_from_bytes(
                                 record.value
@@ -201,7 +202,7 @@ def iterate_records(self, do_not_filter=False):
                             val_idx += 1
 
                             (
-                                blink_version,
+                                _blink_version,
                                 varint_raw,
                             ) = ccl_chromium_indexeddb.le_varint_from_bytes(
                                 record.value[val_idx:]
@@ -226,7 +227,7 @@ def iterate_records(self, do_not_filter=False):
                                     "state": record.state,
                                     "seq": record.seq,
                                 }
-                            except Exception as e:
+                            except Exception:
                                 # TODO Some proper error handling wouldn't hurt
                                 continue
                 # print(f"{datastore} {global_id.name} {records_per_object_store}")
@@ -280,7 +281,7 @@ def write_results_to_json(data, outputpath):
             json.dump(
                 data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False
             )
-    except EnvironmentError as e:
+    except OSError as e:
         print(e)
 
 
@@ -290,5 +291,5 @@ def parse_json():
         with open("teams.json") as json_file:
             data = json.load(json_file)
             return data
-    except EnvironmentError as e:
+    except OSError as e:
         print(e)
diff --git a/utils/consts.py → src/forensicsim/consts.py b/utils/consts.py → src/forensicsim/consts.py