Skip to content

Commit

Permalink
feat: added comments, argparse short codes, deduplication for meetings
Browse files Browse the repository at this point in the history
  • Loading branch information
lxndrblz committed Jun 29, 2021
1 parent 596edcc commit ee3d9e7
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 57 deletions.
4 changes: 2 additions & 2 deletions utils/dump_leveldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def parse_cmdline():
description = 'Forensics.im Dump Tool'
parser = argparse.ArgumentParser(description=description)
required_group = parser.add_argument_group('required arguments')
required_group.add_argument('--filepath', required=True, help='File path to the IndexedDB.')
required_group.add_argument('--outputpath', required=True, help='File path to the processed output.')
required_group.add_argument('-f', '--filepath', required=True, help='File path to the IndexedDB.')
required_group.add_argument('-o', '--outputpath', required=True, help='File path to the processed output.')
args = parser.parse_args()
return args

Expand Down
128 changes: 74 additions & 54 deletions utils/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,15 @@ def extract_fields(record, keys):
def parse_contacts(contacts):
cleaned = []
for contact in contacts:
value = contact['value']
x = extract_fields(value, 'contact')
x['origin_file'] = contact['origin_file']
x['record_type'] = 'contact'
cleaned.append(x)
try:
value = contact['value']
x = extract_fields(value, 'contact')
x['origin_file'] = contact['origin_file']
x['record_type'] = 'contact'
cleaned.append(x)
except UnicodeDecodeError or KeyError:
print("Could not decode contact.")
print(contact)

# Deduplicate based on mri - should be unique anyway
cleaned = deduplicate(cleaned, 'mri')
Expand All @@ -65,42 +69,49 @@ def parse_reply_chain(reply_chains):
message = value['messages']
for key, value in message.items():
# parse as a normal chat message

x = extract_fields(value, 'message')
x['origin_file'] = reply_chain['origin_file']
# Files send without any description will be of type text
if x['messagetype'] == 'RichText/Html' or x['messagetype'] == 'Text':
# Get the call logs
if 'call-log' in x['properties']:
# call logs are string escaped
x['properties']['call-log'] = json.loads(value['properties']['call-log'])
x['record_type'] = 'call'
# Get the reactions from the chat
elif 'activity' in x['properties']:
# reactionInChat are for personal conversations, reactions are for posts or comments
if x['properties']['activity']['activityType'] == 'reactionInChat' or 'reaction':
x['record_type'] = 'reaction'
# normal message, posts, file transfers
else:
x['content'] = strip_html_tags(x['content'])
x['record_type'] = 'message'

# handle string escaped json arrays within properties
if 'links' in x['properties']:
x['properties']['links'] = json.loads(x['properties']['links'])
if 'files' in x['properties']:
x['properties']['files'] = json.loads(x['properties']['files'])
# convert the timestamps
x['createdTime'] = convert_time_stamps(x['createdTime'])
x['version'] = convert_time_stamps(x['version'])
# manually construct the cachedDeduplicationKey, because not every replychain appears to have it
x['cachedDeduplicationKey'] = str(x['creator']+x['clientmessageid'])
cleaned.append(x)
# Other types include ThreadActivity/TopicUpdate and ThreadActivity/AddMember
# -> ThreadActivity/TopicUpdate occurs for meeting updates
# -> ThreadActivity/AddMember occurs when someone gets added to a chat

# Deduplicate
try:
x = extract_fields(value, 'message')
x['origin_file'] = reply_chain['origin_file']
# Files send without any description will be of type text
if x['messagetype'] == 'RichText/Html' or x['messagetype'] == 'Text':
# Get the call logs
if 'call-log' in x['properties']:
# call logs are string escaped
x['properties']['call-log'] = json.loads(value['properties']['call-log'])
x['record_type'] = 'call'
# Get the reactions from the chat
elif 'activity' in x['properties']:
# reactionInChat are for personal conversations, reactions are for posts or comments
if x['properties']['activity']['activityType'] == 'reactionInChat' or 'reaction':
x['record_type'] = 'reaction'
# normal message, posts, file transfers
else:
x['content'] = strip_html_tags(x['content'])
x['record_type'] = 'message'

# handle string escaped json arrays within properties
if 'links' in x['properties']:
x['properties']['links'] = json.loads(x['properties']['links'])
if 'files' in x['properties']:
x['properties']['files'] = json.loads(x['properties']['files'])
# convert the timestamps
x['createdTime'] = convert_time_stamps(x['createdTime'])
x['version'] = convert_time_stamps(x['version'])
# manually construct the cachedDeduplicationKey, because not every replychain appears to have this key.
# cachedDeduplicationKey look like 8:orgid:54dd27a7-fbb0-4bf0-8208-a4b31a578a3f6691174965251523000
# They are composed of the:
# -> creator 8:orgid:54dd27a7-fbb0-4bf0-8208-a4b31a578a3f
# -> clientmessageid 6691174965251523000
if x['creator'] is not None and x['clientmessageid'] is not None:
x['cachedDeduplicationKey'] = str(x['creator']+x['clientmessageid'])
cleaned.append(x)
# Other types include ThreadActivity/TopicUpdate and ThreadActivity/AddMember
# -> ThreadActivity/TopicUpdate occurs for meeting updates
# -> ThreadActivity/AddMember occurs when someone gets added to a chat
except UnicodeDecodeError or KeyError:
print("Could not decode reply chain.")
print(reply_chain)
# Deduplicate based on cachedDeduplicationKey, as messages appear often multiple times within
cleaned = deduplicate(cleaned, 'cachedDeduplicationKey')
return cleaned

Expand All @@ -113,24 +124,33 @@ def parse_conversations(conversations):
x = extract_fields(value, 'conversation')
# Include file origin for records
x['origin_file'] = conversation['origin_file']
if x['type'] == 'Meeting':
# assign the type for further processing as the object store might not be sufficient
if 'threadProperties' in x:
if 'meeting' in x['threadProperties']:
x['threadProperties']['meeting'] = json.loads(x['threadProperties']['meeting'])
x['record_type'] = 'meeting'
cleaned.append(x)
except UnicodeDecodeError:
# Make first at sure that the conversation has a cachedDeduplicationKey
if 'lastMessage' in conversation['value']:
if 'cachedDeduplicationKey' in conversation['value']['lastMessage']:
x['cachedDeduplicationKey'] = conversation['value']['lastMessage']['cachedDeduplicationKey']
# we are only interested in meetings for now
if x['type'] == 'Meeting':
# assign the type for further processing as the object store might not be sufficient
if 'threadProperties' in x:
if 'meeting' in x['threadProperties']:
x['threadProperties']['meeting'] = json.loads(x['threadProperties']['meeting'])
x['record_type'] = 'meeting'
cleaned.append(x)
except UnicodeDecodeError or KeyError:
print("Could not decode meeting.")
# Other types include Message, Chat, Space
print(conversation)
# Other types include Message, Chat, Space, however, these did not include any records of evidential value
# for my test data. It might be relevant to investigate these further with a different test scenario.

# Deduplicate
cleaned = deduplicate(cleaned, 'cachedDeduplicationKey')
return cleaned


def parse_records(records):
parsed_records = []

# Parse the records based on the store they are in. Some records, such as meetings appear in multiple store.
# The most appropriate one was used in this case.
# Parse the records based on the store they are in.

# parse contacts
contacts = [d for d in records if d['store'] == 'people']
Expand Down Expand Up @@ -185,8 +205,8 @@ def parse_cmdline():
description = 'Forensics.im Xtract Tool'
parser = argparse.ArgumentParser(description=description)
required_group = parser.add_argument_group('required arguments')
required_group.add_argument('--filepath', required=True, help='File path to the IndexedDB.')
required_group.add_argument('--outputpath', required=True, help='File path to the processed output.')
required_group.add_argument('-f', '--filepath', required=True, help='File path to the IndexedDB.')
required_group.add_argument('-o', '--outputpath', required=True, help='File path to the processed output.')
args = parser.parse_args()
return args

Expand Down
10 changes: 9 additions & 1 deletion utils/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ccl_chrome_indexeddb import ccl_blink_value_deserializer, ccl_chromium_indexeddb, ccl_v8_value_deserializer, \
ccl_leveldb

# db ID (second byte) is constant, store ids vary based on what we are looking for
# db ID (second byte) is constant for the version analysed, store ids vary based on what we are looking for.
TEAMS_DB_PREFIX = {'replychains': b'\x00\x05\x02\x01', 'conversations': b'\x00\x05\x04\x01',
'people': b'\x00\x05\x07\x01'}

Expand All @@ -13,6 +13,8 @@ def deserialize(db):
# Deserializer is adopted from but uses constant database and object stores IDs rather than looping through the dbs forever.
# https://github.com/cclgroupltd/ccl_chrome_indexeddb/blob/master/ccl_chromium_indexeddb.py

# TODO refactor code to a subclass of RawLevelDB and override the iterator functionality

blink_deserializer = ccl_blink_value_deserializer.BlinkV8Deserializer()

deserialized_db = []
Expand All @@ -36,11 +38,15 @@ def deserialize(db):

val_idx += len(varint_raw)

# read the raw value of the record.
obj_raw = io.BytesIO(record.value[val_idx:])

# Initialize deserializer and try deserialization.
deserializer = ccl_v8_value_deserializer.Deserializer(
obj_raw, host_object_delegate=blink_deserializer.read)
try:
value = deserializer.read()
# TODO refactor code to a generator.
deserialized_db.append({'value': value, 'origin_file': record.origin_file, 'store': datastore})
except Exception as e:
pass
Expand All @@ -57,12 +63,14 @@ def write_results_to_json(data, outputpath):


def parse_db(filepath):
# Open raw access to a LevelDB and deserialize the records.
db = ccl_leveldb.RawLevelDb(filepath)
extracted_values = deserialize(db)
return extracted_values


def parse_json():
# read data from a file. This is only for testing purpose.
try:
with open('teams.json') as json_file:
data = json.load(json_file)
Expand Down

0 comments on commit ee3d9e7

Please sign in to comment.