diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 74217de..1cb4517 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -20,4 +20,4 @@ updates: interval: "daily" assignees: - "lxndrblz" - - "KarelZe" \ No newline at end of file + - "KarelZe" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ab6c93c..ae37477 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -25,4 +25,4 @@ jobs: run: >- gh release upload '${{ github.ref_name }}' dist/*exe - --repo '${{ github.repository }}' \ No newline at end of file + --repo '${{ github.repository }}' diff --git a/.gitignore b/.gitignore index 6b64ff1..34dd43e 100644 --- a/.gitignore +++ b/.gitignore @@ -128,4 +128,4 @@ dmypy.json .jython_cache/ venv/ -env/ \ No newline at end of file +env/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5e29f1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: check-added-large-files + - id: check-byte-order-marker + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + args: ['--unsafe'] + - id: debug-statements + - id: end-of-file-fixer + - id: mixed-line-ending diff --git a/CITATION.cff b/CITATION.cff index 575a7ed..17bb0d9 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -6,11 +6,11 @@ authors: affiliation: "Abertay University" orcid: "https://orcid.org/0000-0002-0692-2482" title: "Forensics.im Microsoft Teams Parser & Autopsy Plugin" -keywords: +keywords: - "Microsoft Teams" - Forensics - Electron abstract: "Autopsy Plugin for the Digital Forensic Acquisition and Analysis of Artefacts Generated by Microsoft Teams." version: 0.3.0 license: MIT -date-released: "2021-08-07" \ No newline at end of file +date-released: "2021-08-07" diff --git a/Forensicsim_Parser.py b/Forensicsim_Parser.py index 6912f0a..3a301b8 100644 --- a/Forensicsim_Parser.py +++ b/Forensicsim_Parser.py @@ -27,10 +27,10 @@ # Parses LevelDb's of Electron-based Microsoft Teams Desktop Client # May 2021 -# -# Comments +# +# Comments # Version 1.0 - Initial version - May 2021 -# +# import calendar import inspect diff --git a/README.md b/README.md index aeb343b..437978e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ the binary *ldb* files, which contain the majority of the entries and allows ide messages and contacts, and presets these in Autopsy's blackboard view. This parser has been tested using: -* Microsoft Teams 1.4.00.11161 (Windows 10) with a free business organisation +* Microsoft Teams 1.4.00.11161 (Windows 10) with a free business organisation * Microsoft "Teams 2.0" (Windows 11) 48/21062133356 with a personal organisation This plugin is an artefact of the Master Thesis *Digital Forensic Acquisition and Analysis @@ -169,5 +169,4 @@ The `populationdata` contains *JSON* files of the communication that has been po - [ccl_chrome_indexeddb](https://github.com/cclgroupltd/ccl_chrome_indexeddb) Python module for enumerating the * LevelDB* artefacts without external dependencies. - [Gutenberg Project](https://www.gutenberg.org/files/1661/1661-0.txt) Part of Arthur Conan Doyle's book *The Adventures - of Sherlock Holmes* have been used for creating a natural conversation between the two demo accounts. - \ No newline at end of file + of Sherlock Holmes* have been used for creating a natural conversation between the two demo accounts. diff --git a/build.bat b/build.bat index 1c53143..7b22698 100644 --- a/build.bat +++ b/build.bat @@ -3,4 +3,3 @@ pyinstaller "main.spec" :: Copy the two files of interest into the Autopsy plugin directory - overwrite if necessary xcopy /y "dist\ms_teams_parser.exe" "%appdata%\autopsy\python_modules\forensicsim" xcopy /y "Forensicsim_Parser.py" "%appdata%\autopsy\python_modules\forensicsim" - diff --git a/export/jane_doe_raw.json b/export/jane_doe_raw.json index 164fedc..b791cb4 100644 --- a/export/jane_doe_raw.json +++ b/export/jane_doe_raw.json @@ -130268,4 +130268,4 @@ "userType": "Member" } } -] \ No newline at end of file +] diff --git a/export/john_doe_raw.json b/export/john_doe_raw.json index 3b3529c..3bbad86 100644 --- a/export/john_doe_raw.json +++ b/export/john_doe_raw.json @@ -144921,4 +144921,4 @@ "userType": "Member" } } -] \ No newline at end of file +] diff --git a/populationdata/README.md b/populationdata/README.md index 3db5341..d98982e 100644 --- a/populationdata/README.md +++ b/populationdata/README.md @@ -4,4 +4,4 @@ The files that were used for populating the clients can be found in this directo This eBook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with almost no restrictions whatsoever. You may copy it, give it away or re-use it under the terms of the Project Gutenberg License included with this eBook or online at [www.gutenberg.org](www.gutenberg.org). If you are not located in the United States, you will have to check the laws of the country where you are located before using this eBook. # Plain Vanilla ASCII -The plain-text full-text of the book can be retrieved free of charge from the project Gutenberg website under [www.gutenberg.org](https://gutenberg.org/files/1661/1661-0.txt). \ No newline at end of file +The plain-text full-text of the book can be retrieved free of charge from the project Gutenberg website under [www.gutenberg.org](https://gutenberg.org/files/1661/1661-0.txt). diff --git a/requirements.txt b/requirements.txt index 0a42b4a..329172b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ chardet~=4.0.0 pyfiglet~=0.8.post1 colorama~=0.4.4 beautifulsoup4~=4.9.3 -click~=8.0.1 \ No newline at end of file +click~=8.0.1 diff --git a/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG b/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG index c9b4c37..3296e80 100644 --- a/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG +++ b/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG @@ -1,6 +1,6 @@ 2021/06/01-11:08:22.749 1bc8 Reusing MANIFEST C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/MANIFEST-000001 2021/06/01-11:08:22.751 1bc8 Recovering log #80 -2021/06/01-11:08:22.755 1bc8 Reusing old log C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/000080.log +2021/06/01-11:08:22.755 1bc8 Reusing old log C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/000080.log 2021/06/01-11:34:29.465 878 Level-0 table #86: started 2021/06/01-11:34:29.550 878 Level-0 table #86: 1572363 bytes OK 2021/06/01-11:34:29.571 878 Delete type=0 #80 diff --git a/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG.old b/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG.old index 0afec1b..eb4e725 100644 --- a/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG.old +++ b/testdata/John Doe/IndexedDB/https_teams.microsoft.com_0.indexeddb.leveldb/LOG.old @@ -1,6 +1,6 @@ 2021/06/01-10:12:06.695 17ec Reusing MANIFEST C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/MANIFEST-000001 2021/06/01-10:12:06.697 17ec Recovering log #74 -2021/06/01-10:12:06.721 17ec Reusing old log C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/000074.log +2021/06/01-10:12:06.721 17ec Reusing old log C:\Users\forensics\AppData\Roaming\Microsoft\Teams\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb/000074.log 2021/06/01-10:46:52.270 70 Level-0 table #81: started 2021/06/01-10:46:52.297 70 Level-0 table #81: 1637675 bytes OK 2021/06/01-10:46:52.299 70 Delete type=0 #74 diff --git a/utils/.gitignore b/utils/.gitignore index 3dd25e3..71f0b4c 100644 --- a/utils/.gitignore +++ b/utils/.gitignore @@ -138,4 +138,4 @@ dmypy.json cython_debug/ # Teams chat logs -sample/ \ No newline at end of file +sample/ diff --git a/utils/ccl_chrome_indexeddb/Chromium_dump_local_storage.py b/utils/ccl_chrome_indexeddb/Chromium_dump_local_storage.py index 9524685..0eacad4 100644 --- a/utils/ccl_chrome_indexeddb/Chromium_dump_local_storage.py +++ b/utils/ccl_chrome_indexeddb/Chromium_dump_local_storage.py @@ -31,7 +31,7 @@ DB_SCHEMA = """ CREATE TABLE storage_keys ("_id" INTEGER PRIMARY KEY AUTOINCREMENT, "storage_key" TEXT); -CREATE TABLE batches ("start_ldbseq" INTEGER PRIMARY KEY, +CREATE TABLE batches ("start_ldbseq" INTEGER PRIMARY KEY, "end_ldbseq" INTEGER, "storage_key" INTEGER, "timestamp" INTEGER); @@ -44,7 +44,7 @@ CREATE INDEX "storage_keys_storage_key" ON "storage_keys" ("storage_key"); CREATE VIEW "records_view" AS - SELECT + SELECT storage_keys.storage_key AS "storage_key", records."key" AS "key", records.value AS "value", @@ -57,7 +57,7 @@ """ INSERT_STORAGE_KEY_SQL = """INSERT INTO "storage_keys" ("storage_key") VALUES (?);""" -INSERT_BATCH_SQL = """INSERT INTO "batches" ("start_ldbseq", "end_ldbseq", "storage_key", "timestamp") +INSERT_BATCH_SQL = """INSERT INTO "batches" ("start_ldbseq", "end_ldbseq", "storage_key", "timestamp") VALUES (?, ?, ?, ?);""" INSERT_RECORD_SQL = """INSERT INTO "records" ("storage_key", "key", "value", "batch", "ldbseq") VALUES (?, ?, ?, ?, ?);""" diff --git a/utils/ccl_chrome_indexeddb/Chromium_dump_session_storage.py b/utils/ccl_chrome_indexeddb/Chromium_dump_session_storage.py index bd7d4af..d40799d 100644 --- a/utils/ccl_chrome_indexeddb/Chromium_dump_session_storage.py +++ b/utils/ccl_chrome_indexeddb/Chromium_dump_session_storage.py @@ -31,16 +31,16 @@ DB_SCHEMA = """ CREATE TABLE "hosts" ("_id" INTEGER PRIMARY KEY AUTOINCREMENT, "host" TEXT); CREATE TABLE "guids" ("_id" INTEGER PRIMARY KEY AUTOINCREMENT, "guid" TEXT); -CREATE TABLE "items" ("_id" INTEGER PRIMARY KEY AUTOINCREMENT, - "host" INTEGER, - "guid" INTEGER, - "ldbseq" INTEGER, - "key" TEXT, +CREATE TABLE "items" ("_id" INTEGER PRIMARY KEY AUTOINCREMENT, + "host" INTEGER, + "guid" INTEGER, + "ldbseq" INTEGER, + "key" TEXT, "value" TEXT); -CREATE INDEX "item_host" ON "items" ("host"); +CREATE INDEX "item_host" ON "items" ("host"); CREATE INDEX "item_ldbseq" ON "items" ("ldbseq"); -CREATE VIEW items_view AS +CREATE VIEW items_view AS SELECT "items"."ldbseq", "hosts"."host", "items"."key", diff --git a/utils/ccl_chrome_indexeddb/LICENSE b/utils/ccl_chrome_indexeddb/LICENSE index 96b30ac..8b9f05e 100644 --- a/utils/ccl_chrome_indexeddb/LICENSE +++ b/utils/ccl_chrome_indexeddb/LICENSE @@ -16,4 +16,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/utils/ccl_chrome_indexeddb/README.md b/utils/ccl_chrome_indexeddb/README.md index 69a26fe..5b65a84 100644 --- a/utils/ccl_chrome_indexeddb/README.md +++ b/utils/ccl_chrome_indexeddb/README.md @@ -1,5 +1,5 @@ # ccl_chrome_indexeddb -This repository contains (sometimes partial) re-implementations of the technologies involved in reading IndexedDB data +This repository contains (sometimes partial) re-implementations of the technologies involved in reading IndexedDB data in Chrome-esque applications. This includes: * Snappy decompression @@ -12,7 +12,7 @@ This includes: Read a blog on the subject here: https://www.cclsolutionsgroup.com/post/indexeddb-on-chromium ### Caveats -There is a fair amount of work yet to be done in terms of documentation, but +There is a fair amount of work yet to be done in terms of documentation, but the modules should be fine for pulling data out of IndexedDB, with the following caveats: @@ -33,12 +33,12 @@ me towards test data, I'd be very thankful! #### Cyclic references It is noted in the V8 source that recursive referencing is possible in the serialization, we're not yet accounting for that so if Python throws a -`RecursionError` that's likely what you're seeing. The plan is to use a +`RecursionError` that's likely what you're seeing. The plan is to use a similar approach to ccl_bplist where the collection types are subclassed and do Just In Time resolution of the items, but that isn't done yet. ## Using the modules -There are two methods for accessing records - a more pythonic API using a set of +There are two methods for accessing records - a more pythonic API using a set of wrapper objects and a raw API which doesn't mask the underlying workings. There is unlikely to be much benefit to using the raw API in most cases, so the wrapper objects are recommended in most cases. @@ -82,14 +82,14 @@ for record in obj_store.iterate_records(): with record.get_blob_stream(record.value["file"]) as f: file_data = f.read() -# By default, any errors in decoding records will bubble an exception +# By default, any errors in decoding records will bubble an exception # which might be painful when iterating records in a for-loop, so either -# passing True into the errors_to_stdout argument and/or by passing in an -# error handler function to bad_deserialization_data_handler, you can +# passing True into the errors_to_stdout argument and/or by passing in an +# error handler function to bad_deserialization_data_handler, you can # perform logging rather than crashing: for record in obj_store.iterate_records( - errors_to_stdout=True, + errors_to_stdout=True, bad_deserializer_data_handler= lambda k,v: print(f"error: {k}, {v}")): print(record.user_key) print(record.value) @@ -107,16 +107,16 @@ blob_folder_path = sys.argv[2] # open the database: db = ccl_chromium_indexeddb.IndexedDb(leveldb_folder_path, blob_folder_path) -# there can be multiple databases, so we need to iterate through them (NB +# there can be multiple databases, so we need to iterate through them (NB # DatabaseID objects contain additional metadata, they aren't just ints): for db_id_meta in db.global_metadata.db_ids: # and within each database, there will be multiple object stores so we # will need to know the maximum object store number (this process will be # cleaned up in future releases): max_objstore_id = db.get_database_metadata( - db_id_meta.dbid_no, + db_id_meta.dbid_no, ccl_chromium_indexeddb.DatabaseMetadataType.MaximumObjectStoreId) - + # if the above returns None, then there are no stores in this db if max_objstore_id is None: continue @@ -137,4 +137,3 @@ for db_id_meta in db.global_metadata.db_ids: with record.get_blob_stream(record.value["file"]) as f: file_data = f.read() ``` - diff --git a/utils/ccl_chrome_indexeddb/ccl_chromium_localstorage.py b/utils/ccl_chrome_indexeddb/ccl_chromium_localstorage.py index 43e6e17..520630a 100644 --- a/utils/ccl_chrome_indexeddb/ccl_chromium_localstorage.py +++ b/utils/ccl_chrome_indexeddb/ccl_chromium_localstorage.py @@ -39,11 +39,11 @@ Meta keys: Key = "META:" + storage_key (the host) Value = protobuff: 1=timestamp (varint); 2=size in bytes (varint) - + Record keys: Key = "_" + storage_key + "\\x0" + script_key Value = record_value - + """ _META_PREFIX = b"META:" @@ -315,4 +315,3 @@ def main(args): if __name__ == '__main__': main(sys.argv[1:]) -