diff --git a/bin/historical/migrations/_common.py b/bin/historical/migrations/_common.py new file mode 100644 index 000000000..a562c0ee1 --- /dev/null +++ b/bin/historical/migrations/_common.py @@ -0,0 +1,42 @@ +import os +import subprocess +import importlib +import logging +import tempfile +import time + +import emission.core.get_database as edb + +DB_HOST_TEMPLATE = os.environ.get('DB_HOST_TEMPLATE', "mongodb://localhost:27017/openpath_prod_REPLACEME") + +if 'PROD_LIST' in os.environ: + PROD_LIST=os.environ['PROD_LIST'].split(",") +else: + with tempfile.TemporaryDirectory() as tmpdirname: + print(f"created {tmpdirname=} to find list of configs") + os.chdir(tmpdirname) + proc = subprocess.run( + f"git clone https://github.com/e-mission/nrel-openpath-deploy-configs.git", shell=True) + filenames = os.listdir(f"nrel-openpath-deploy-configs/configs/") + + PROD_LIST = [ + fname.split(".")[0] + for fname in filenames + if fname and 'dev-' not in fname and 'stage-' not in fname + ] +print(f"PROD_LIST: {PROD_LIST}") + +def run_on_all_deployments(fn_to_run): + """ + Run the given function on the database for each deployment by setting the + DB_HOST environment variable in between each function call. + The list of deployments (PROD_LIST) is retrieved from the + nrel-openpath-deploy-configs repo upon initialization of this module. + """ + for prod in PROD_LIST: + prod_db_name = prod.replace("-", "_") + print(f"Running {fn_to_run.__name__} for {prod} on DB {prod_db_name}") + os.environ['DB_HOST'] = DB_HOST_TEMPLATE.replace( + "REPLACEME", prod_db_name) + importlib.reload(edb) + fn_to_run() diff --git a/bin/historical/migrations/all_deployments_template.py b/bin/historical/migrations/all_deployments_template.py new file mode 100644 index 000000000..202e26f6e --- /dev/null +++ b/bin/historical/migrations/all_deployments_template.py @@ -0,0 +1,8 @@ +import emission.core.get_database as edb + +from _common import run_on_all_deployments + +def print_connect_url(): + print("Connecting to database URL"+edb.url) + +run_on_all_deployments(print_connect_url) diff --git a/bin/historical/migrations/trim_fluff_from_composite_trips.py b/bin/historical/migrations/trim_fluff_from_composite_trips.py new file mode 100644 index 000000000..86f6d790a --- /dev/null +++ b/bin/historical/migrations/trim_fluff_from_composite_trips.py @@ -0,0 +1,52 @@ +import emission.core.get_database as edb + +from _common import run_on_all_deployments + + +def trim_fluff_from_composite_trips(): + """ + Trim unnecessary fields from composite trips in the analysis_timeseries_db. + The shape of the remaining fields is unchanged. + """ + print("Trimming fluff from composite trips") + analysis_ts = edb.get_analysis_timeseries_db() + for ct in analysis_ts.find({'metadata.key': 'analysis/composite_trip'}): + # print(f"Trimming {ct['_id']}, {ct['data'].get('start_ts')} - {ct['data'].get('end_ts')}") + for l in ct['data'].get('locations', []): + trim_entry(l, { + 'metadata': [], + 'data': ['loc', 'ts'], + }) + + for s in ct['data'].get('sections', []): + trim_entry(s, { + 'metadata': [], + 'data': ['start_ts', 'end_ts', 'sensed_mode', 'sensed_mode_str', + 'ble_sensed_mode', 'distance', 'duration'], + }) + + for key in ['start_confirmed_place', 'end_confirmed_place']: + trim_entry(ct['data'].get(key), { + '_id': True, + 'metadata': ['key'], + 'data': ['enter_ts', 'exit_ts', 'location', 'duration', + 'user_input', 'additions'], + }) + + analysis_ts.update_one( + {'_id': ct['_id']}, + {'$set': {'data': ct['data']}} + ) + + +def trim_entry(entry, fields_to_keep): + if entry is None: + return + for key in list(entry): + if key not in fields_to_keep: + del entry[key] + elif isinstance(entry[key], dict) and isinstance(fields_to_keep, dict): + trim_entry(entry[key], fields_to_keep[key]) + + +run_on_all_deployments(trim_fluff_from_composite_trips)