From c2568d230d1cdb63904a1c11f8d71a53bca34e5a Mon Sep 17 00:00:00 2001 From: brandonzhu09 Date: Fri, 9 Feb 2024 16:41:13 -0500 Subject: [PATCH 1/2] revised input tool schema and tests --- .github/workflows/test_database.yaml | 16 ---- src/evagram/database/dataset.json | 6 +- src/evagram/database/input_tool.py | 86 ++++++++++-------- .../sql/proc_create_observation_variable.sql | 5 +- .../database/sql/proc_create_plots.sql | 74 ++++++++++----- .../sql/proc_create_variable_group.sql | 24 +++++ ...__effectiveerrordiff-vs-gsifinalerror.pkl} | Bin tests/test_input_tool.py | 72 +++++---------- 8 files changed, 154 insertions(+), 129 deletions(-) create mode 100644 src/evagram/database/sql/proc_create_variable_group.sql rename tests/eva/satwind/{windEastward_effectiveerrordiff-vs-gsifinalerror.pkl => windEastward__effectiveerrordiff-vs-gsifinalerror.pkl} (100%) diff --git a/.github/workflows/test_database.yaml b/.github/workflows/test_database.yaml index 15fbbf1..ed5015c 100644 --- a/.github/workflows/test_database.yaml +++ b/.github/workflows/test_database.yaml @@ -35,22 +35,6 @@ jobs: python -m pip install --upgrade pip pip install . -r requirements.txt - - name: Create Stored Procedures - env: - DB_HOST: ${{secrets.DB_HOST}} - DB_NAME: ${{secrets.DB_NAME}} - DB_USER: ${{secrets.DB_USER}} - DB_PASSWORD: ${{secrets.DB_PASSWORD}} - run: | - export PGPASSWORD=$DB_PASSWORD - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_owners.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_experiments.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_groups.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_plots.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_variables.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_observations.sql - psql -h $DB_HOST -U $DB_USER -d $DB_NAME -a -f ./src/evagram/database/sql/proc_create_observation_variable.sql - - name: Run Input Tool env: DB_HOST: ${{secrets.DB_HOST}} diff --git a/src/evagram/database/dataset.json b/src/evagram/database/dataset.json index 5456a75..455c2a2 100644 --- a/src/evagram/database/dataset.json +++ b/src/evagram/database/dataset.json @@ -66,7 +66,7 @@ { "plot_id": 322, "plot_file": "brightnessTemperature_10_hofx-vs-gsihofxbc.pkl", - "experiment_id": 3 + "experiment_id": 12 }, { "plot_id": 323, @@ -76,7 +76,7 @@ { "plot_id": 114, "plot_file": "brightnessTemperature_8_effectiveerror-vs-gsifinalerror.pkl", - "experiment_id": 1 + "experiment_id": 3 }, { "plot_id": 165, @@ -85,7 +85,7 @@ }, { "plot_id": 966, - "plot_file": "windEastward_effectiveerrordiff-vs-gsifinalerror.pkl", + "plot_file": "windEastward__effectiveerrordiff-vs-gsifinalerror.pkl", "experiment_id": 96 } ], diff --git a/src/evagram/database/input_tool.py b/src/evagram/database/input_tool.py index 8d04e69..b0b60d5 100644 --- a/src/evagram/database/input_tool.py +++ b/src/evagram/database/input_tool.py @@ -2,6 +2,8 @@ import json import os import sys +import argparse +from pathlib import Path import psycopg2 from dotenv import load_dotenv @@ -14,9 +16,47 @@ db_user = os.environ.get('DB_USER') db_password = os.environ.get('DB_PASSWORD') -# can be modified to the file path of experiment data -EXPERIMENT_DATA_PATH = './tests/eva/' +# default path configurations +EXPERIMENT_DATA_PATH = './tests/eva' DATASET_PATH = './src/evagram/database/' +PROCEDURES_PATH = './src/evagram/database/sql' + + +def main(args): + global EXPERIMENT_DATA_PATH + parser = argparse.ArgumentParser() + parser.add_argument("experiment_path") + args = parser.parse_args(args) + experiment_path = Path(args.experiment_path) + if experiment_path.exists(): + EXPERIMENT_DATA_PATH = args.experiment_path + + conn = psycopg2.connect( + host=db_host, + port=db_port, + dbname=db_name, + user=db_user, + password=db_password + ) + cur = conn.cursor() + + create_procedures(cur) + drop_tables(cur) + create_tables(cur) + load_dataset_to_db(cur) + + conn.commit() + cur.close() + conn.close() + + +def create_procedures(cur): + for proc in os.listdir(PROCEDURES_PATH): + proc_file = os.path.join(PROCEDURES_PATH, proc) + if os.path.isfile(proc_file) and proc.startswith("proc_") and proc.endswith(".sql"): + contents = open(proc_file, 'r') + cur.execute(contents.read()) + contents.close() def create_tables(cur): @@ -32,8 +72,6 @@ def create_tables(cur): cur.execute("CALL public.create_observations();") # Plots table cur.execute("CALL public.create_plots();") - # Observation Variable join table - cur.execute("CALL public.create_observation_variable();") def drop_tables(cur): @@ -43,7 +81,6 @@ def drop_tables(cur): cur.execute("DROP TABLE IF EXISTS groups CASCADE") cur.execute("DROP TABLE IF EXISTS observations CASCADE") cur.execute("DROP TABLE IF EXISTS variables CASCADE") - cur.execute("DROP TABLE IF EXISTS observation_variable CASCADE") def load_dataset_to_db(cur): @@ -138,13 +175,8 @@ def add_plot(cur, plot_obj, observation_dirs): plot_components = filename_no_extension.split("_") var_name = plot_components[0] - # parse channel fields for brightnessTemperature observations - if var_name == "brightnessTemperature": - channel = plot_components[1] - group_name = plot_components[2] - else: - channel = None - group_name = plot_components[1] + channel = plot_components[1] if plot_components[1] != '' else None + group_name = plot_components[2] # insert observation, variable, group dynamically if not exist in database cur.execute("SELECT observation_id FROM observations WHERE observation_name=%s", @@ -194,39 +226,13 @@ def add_plot(cur, plot_obj, observation_dirs): plot_obj["script"] = script plot_obj["observation_id"] = observation_id plot_obj["group_id"] = group_id + plot_obj["variable_id"] = variable_id # insert plot to database insert_table_record(cur, plot_obj, "plots") - # create relationship between observation and variable in join table - # only if at least one of them was just inserted - if new_observation or new_variable: - observation_variable_obj = { - "observation_id": observation_id, - "variable_id": variable_id - } - insert_table_record(cur, observation_variable_obj, "observation_variable") - return 0 if __name__ == "__main__": - if len(sys.argv) > 1: - EXPERIMENT_DATA_PATH = sys.argv[1] - - conn = psycopg2.connect( - host=db_host, - port=db_port, - dbname=db_name, - user=db_user, - password=db_password - ) - cur = conn.cursor() - - drop_tables(cur) - create_tables(cur) - load_dataset_to_db(cur) - - conn.commit() - cur.close() - conn.close() + main(sys.argv[1:]) diff --git a/src/evagram/database/sql/proc_create_observation_variable.sql b/src/evagram/database/sql/proc_create_observation_variable.sql index cbb5c29..59f6c2e 100644 --- a/src/evagram/database/sql/proc_create_observation_variable.sql +++ b/src/evagram/database/sql/proc_create_observation_variable.sql @@ -7,9 +7,9 @@ CREATE OR REPLACE PROCEDURE public.create_observation_variable( LANGUAGE 'sql' AS $BODY$ CREATE TABLE observation_variable ( + observation_variable_id SERIAL PRIMARY KEY, observation_id INTEGER NOT NULL, variable_id INTEGER NOT NULL, - CONSTRAINT observation_variable_id PRIMARY KEY (observation_id, variable_id), CONSTRAINT fk_observation FOREIGN KEY (observation_id) REFERENCES observations(observation_id) @@ -17,7 +17,8 @@ CREATE TABLE observation_variable ( CONSTRAINT fk_variable FOREIGN KEY (variable_id) REFERENCES variables(variable_id) - ON DELETE CASCADE + ON DELETE CASCADE, + UNIQUE(observation_id, variable_id) ); $BODY$; ALTER PROCEDURE public.create_observation_variable() diff --git a/src/evagram/database/sql/proc_create_plots.sql b/src/evagram/database/sql/proc_create_plots.sql index 327318a..a4d2d44 100644 --- a/src/evagram/database/sql/proc_create_plots.sql +++ b/src/evagram/database/sql/proc_create_plots.sql @@ -6,27 +6,59 @@ CREATE OR REPLACE PROCEDURE public.create_plots( ) LANGUAGE 'sql' AS $BODY$ -CREATE TABLE IF NOT EXISTS plots ( - plot_id serial PRIMARY KEY, - div VARCHAR, - script VARCHAR, - experiment_id INTEGER NOT NULL, - group_id INTEGER NOT NULL, - observation_id INTEGER NOT NULL, - CONSTRAINT fk_experiment - FOREIGN KEY (experiment_id) - REFERENCES experiments(experiment_id) - ON DELETE CASCADE, - CONSTRAINT fk_group - FOREIGN KEY (group_id) - REFERENCES groups(group_id) - ON DELETE CASCADE, - CONSTRAINT fk_observation - FOREIGN KEY (observation_id) - REFERENCES observations(observation_id) - ON DELETE CASCADE, - UNIQUE(experiment_id, group_id, observation_id) -); +CREATE TABLE IF NOT EXISTS public.plots +( + plot_id integer NOT NULL GENERATED BY DEFAULT AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 2147483647 CACHE 1 ), + div character varying COLLATE pg_catalog."default", + script character varying COLLATE pg_catalog."default", + experiment_id integer NOT NULL, + group_id integer NOT NULL, + observation_id integer NOT NULL, + variable_id integer NOT NULL, + CONSTRAINT plots_pkey PRIMARY KEY (plot_id), + CONSTRAINT plots_experiment_id_group_id_o_78d8b74e_uniq UNIQUE (experiment_id, group_id, observation_id, variable_id), + CONSTRAINT plots_experiment_id_79cb5574_fk_experiments_experiment_id FOREIGN KEY (experiment_id) + REFERENCES public.experiments (experiment_id) + ON DELETE CASCADE, + CONSTRAINT plots_group_id_85eaf2d2_fk_groups_group_id FOREIGN KEY (group_id) + REFERENCES public.groups (group_id) + ON DELETE CASCADE, + CONSTRAINT plots_observation_id_26813e98_fk_observations_observation_id FOREIGN KEY (observation_id) + REFERENCES public.observations (observation_id) + ON DELETE CASCADE, + CONSTRAINT plots_variable_id_394e0899_fk_variables_variable_id FOREIGN KEY (variable_id) + REFERENCES public.variables (variable_id) + ON DELETE CASCADE +) + + + +-- CREATE TABLE IF NOT EXISTS plots ( +-- plot_id serial PRIMARY KEY, +-- div VARCHAR, +-- script VARCHAR, +-- experiment_id INTEGER NOT NULL, +-- group_id INTEGER NOT NULL, +-- observation_id INTEGER NOT NULL, +-- variable_id INTEGER NOT NULL, +-- CONSTRAINT fk_experiment +-- FOREIGN KEY (experiment_id) +-- REFERENCES experiments(experiment_id) +-- ON DELETE CASCADE, +-- CONSTRAINT fk_group +-- FOREIGN KEY (group_id) +-- REFERENCES groups(group_id) +-- ON DELETE CASCADE, +-- CONSTRAINT fk_observation +-- FOREIGN KEY (observation_id) +-- REFERENCES observation_variable(observation_id) +-- ON DELETE CASCADE, +-- CONSTRAINT fk_variable +-- FOREIGN KEY (variable_id) +-- REFERENCES variables(variable_id) +-- ON DELETE CASCADE, +-- UNIQUE(experiment_id, group_id, observation_id, variable_id) + $BODY$; ALTER PROCEDURE public.create_plots() OWNER TO postgres; diff --git a/src/evagram/database/sql/proc_create_variable_group.sql b/src/evagram/database/sql/proc_create_variable_group.sql new file mode 100644 index 0000000..e10b480 --- /dev/null +++ b/src/evagram/database/sql/proc_create_variable_group.sql @@ -0,0 +1,24 @@ +-- PROCEDURE: public.create_variable_group() + +-- DROP PROCEDURE IF EXISTS public.create_variable_group(); + +CREATE OR REPLACE PROCEDURE public.create_variable_group( + ) +LANGUAGE 'sql' +AS $BODY$ +CREATE TABLE IF NOT EXISTS variable_group +( + variable_group_id serial PRIMARY KEY, + variable_id INT NOT NULL, + group_id INT NOT NULL, + CONSTRAINT fk_group FOREIGN KEY (group_id) + REFERENCES groups (group_id) + ON DELETE CASCADE, + CONSTRAINT fk_variable FOREIGN KEY (variable_id) + REFERENCES variables (variable_id) + ON DELETE CASCADE, + UNIQUE (variable_id, group_id) +) +$BODY$; +ALTER PROCEDURE public.create_variable_group() + OWNER TO postgres; diff --git a/tests/eva/satwind/windEastward_effectiveerrordiff-vs-gsifinalerror.pkl b/tests/eva/satwind/windEastward__effectiveerrordiff-vs-gsifinalerror.pkl similarity index 100% rename from tests/eva/satwind/windEastward_effectiveerrordiff-vs-gsifinalerror.pkl rename to tests/eva/satwind/windEastward__effectiveerrordiff-vs-gsifinalerror.pkl diff --git a/tests/test_input_tool.py b/tests/test_input_tool.py index bff5f67..bae0c06 100644 --- a/tests/test_input_tool.py +++ b/tests/test_input_tool.py @@ -26,6 +26,7 @@ class TestDatabaseInputTool(unittest.TestCase): def setUp(self): self.cur = conn.cursor() + input_tool.main(['tests/eva']) self.cur.execute( """SELECT setval('owners_owner_id_seq', @@ -116,12 +117,14 @@ def test_InsertExperimentWithSameNameAndOwner(self): } input_tool.insert_table_record(self.cur, experiment_obj, "experiments") - def test_DeleteExperimentAndPlots(self): + def test_DeleteExperimentCascades(self): self.cur.execute( "DELETE FROM experiments WHERE experiment_id=%s", (12,)) + # find any instance of experiment in 'experiments' self.cur.execute( "SELECT (experiment_id) FROM experiments WHERE experiment_id=%s", (12,)) assert len(self.cur.fetchall()) == 0 + # find any instance of experiment in 'plots' self.cur.execute( "SELECT (plot_id) FROM plots WHERE experiment_id=%s", (12,)) assert len(self.cur.fetchall()) == 0 @@ -131,14 +134,15 @@ def test_InsertPlotExpected(self): "plot_id": 115, "experiment_id": 1, "group_id": 3, - "observation_id": 2 + "observation_id": 1, + "variable_id": 2 } input_tool.insert_table_record(self.cur, plot_obj, "plots") self.cur.execute( "SELECT (plot_id) FROM plots WHERE plot_id=%s", (115,)) assert len(self.cur.fetchall()) == 1 - def test_InsertPlotWithoutExperimentGroupOrObservation(self): + def test_InsertPlotMissingFields(self): with self.assertRaises(psycopg2.errors.NotNullViolation): plot_obj = { "group_id": 1, @@ -162,11 +166,12 @@ def test_InsertPlotWithoutExperimentGroupOrObservation(self): } input_tool.insert_table_record(self.cur, plot_obj, "plots") - def test_InsertPlotWithExperimentGroupOrObservationNotFound(self): + def test_InsertPlotInvalidFields(self): plot_obj = { "experiment_id": 12, "group_id": 1, - "observation_id": 1 + "observation_id": 1, + "variable_id": 1 } with self.assertRaises(psycopg2.errors.ForeignKeyViolation): plot_obj["experiment_id"] = -12 @@ -183,6 +188,12 @@ def test_InsertPlotWithExperimentGroupOrObservationNotFound(self): plot_obj["group_id"] = 1 plot_obj["observation_id"] = -1 input_tool.insert_table_record(self.cur, plot_obj, "plots") + self.tearDown() + self.setUp() + with self.assertRaises(psycopg2.errors.ForeignKeyViolation): + plot_obj["observation_id"] = 1 + plot_obj["variable_id"] = -1 + input_tool.insert_table_record(self.cur, plot_obj, "plots") def test_InsertObservationExpected(self): observation_obj = { @@ -195,34 +206,6 @@ def test_InsertObservationExpected(self): ("satwind",)) assert len(self.cur.fetchall()) == 1 - def test_InsertNewObservationToExistingVariable(self): - observation_obj = { - "observation_name": "amsua_n19" - } - input_tool.insert_table_record(self.cur, observation_obj, "observations") - # get existing variable and its variable_id - self.cur.execute("SELECT variable_id FROM variables WHERE variable_name=%s AND channel=%s", - ("brightnessTemperature", 4)) - variable_id = self.cur.fetchone()[0] - - # get inserted observation and its observation_id - self.cur.execute("SELECT observation_id FROM observations WHERE observation_name=%s", - ("amsua_n19",)) - observation_id = self.cur.fetchone()[0] - - # establish relationship between observation and variable in junction table - observation_variable_obj = { - "observation_id": observation_id, - "variable_id": variable_id - } - input_tool.insert_table_record(self.cur, observation_variable_obj, "observation_variable") - - # check if variable exists in observation - self.cur.execute("SELECT variable_id FROM observation_variable WHERE observation_id=%s", - (observation_id,)) - variables = self.cur.fetchall() - self.assertTrue((variable_id,) in variables) - def test_InsertObservationWithSameName(self): with self.assertRaises(psycopg2.errors.UniqueViolation): observation_obj = { @@ -230,34 +213,29 @@ def test_InsertObservationWithSameName(self): } input_tool.insert_table_record(self.cur, observation_obj, "observations") - def test_InsertExistingObservationToVariableTwice(self): - with self.assertRaises(psycopg2.errors.UniqueViolation): - observation_variable_obj = { - "observation_id": 1, - "variable_id": 1 - } - input_tool.insert_table_record( - self.cur, observation_variable_obj, "observation_variable") - def test_FetchExistingPlots(self): - # get all amsua_n18 plots in experiment "experiment_iv_2" where the user is asewnath + # get all amsua_n18 plots in experiment "experiment_iv_1" where the user is thamzey self.cur.execute("""SELECT plot_id, plots.experiment_id FROM plots JOIN experiments ON plots.experiment_id = experiments.experiment_id JOIN observations ON plots.observation_id = observations.observation_id + JOIN variables ON plots.variable_id = variables.variable_id JOIN owners ON owners.owner_id = experiments.owner_id WHERE experiments.experiment_name = %s AND owners.username = %s AND observations.observation_name = %s; """, - ("experiment_iv_2", "asewnath", "amsua_n18")) + ("experiment_iv_1", "thamzey", "amsua_n18")) plots = self.cur.fetchall() - self.assertTrue(len(plots) == 1) - # checks if plot with plot_id=114 and experiment_id=1 was found - self.assertTrue((114, 1) in plots) + self.assertTrue(len(plots) == 2) + # checks if plot with plot_id=114 and experiment_id=3 + # and plot_id=323 and experiment_id=3 was found + self.assertTrue((114, 3) in plots) + self.assertTrue((323, 3) in plots) def test_FetchNonExistingPlots(self): # get all satwind plots in experiment "experiment_iv_1" self.cur.execute("""SELECT plot_id FROM plots JOIN experiments ON plots.experiment_id = experiments.experiment_id JOIN observations ON plots.observation_id = observations.observation_id + JOIN variables ON plots.variable_id = variables.variable_id WHERE experiments.experiment_name = %s AND observations.observation_name = %s;""", ("experiment_iv_1", "satwind")) From a6c556461ad2b31c895ccf849f323c5b13b98b3e Mon Sep 17 00:00:00 2001 From: brandonzhu09 Date: Fri, 9 Feb 2024 16:42:18 -0500 Subject: [PATCH 2/2] removed redundant junction tables --- .../sql/proc_create_observation_variable.sql | 25 ------------------- .../sql/proc_create_variable_group.sql | 24 ------------------ 2 files changed, 49 deletions(-) delete mode 100644 src/evagram/database/sql/proc_create_observation_variable.sql delete mode 100644 src/evagram/database/sql/proc_create_variable_group.sql diff --git a/src/evagram/database/sql/proc_create_observation_variable.sql b/src/evagram/database/sql/proc_create_observation_variable.sql deleted file mode 100644 index 59f6c2e..0000000 --- a/src/evagram/database/sql/proc_create_observation_variable.sql +++ /dev/null @@ -1,25 +0,0 @@ --- PROCEDURE: public.create_observation_variable() - --- DROP PROCEDURE IF EXISTS public.create_observation_variable(); - -CREATE OR REPLACE PROCEDURE public.create_observation_variable( - ) -LANGUAGE 'sql' -AS $BODY$ -CREATE TABLE observation_variable ( - observation_variable_id SERIAL PRIMARY KEY, - observation_id INTEGER NOT NULL, - variable_id INTEGER NOT NULL, - CONSTRAINT fk_observation - FOREIGN KEY (observation_id) - REFERENCES observations(observation_id) - ON DELETE CASCADE, - CONSTRAINT fk_variable - FOREIGN KEY (variable_id) - REFERENCES variables(variable_id) - ON DELETE CASCADE, - UNIQUE(observation_id, variable_id) -); -$BODY$; -ALTER PROCEDURE public.create_observation_variable() - OWNER TO postgres; diff --git a/src/evagram/database/sql/proc_create_variable_group.sql b/src/evagram/database/sql/proc_create_variable_group.sql deleted file mode 100644 index e10b480..0000000 --- a/src/evagram/database/sql/proc_create_variable_group.sql +++ /dev/null @@ -1,24 +0,0 @@ --- PROCEDURE: public.create_variable_group() - --- DROP PROCEDURE IF EXISTS public.create_variable_group(); - -CREATE OR REPLACE PROCEDURE public.create_variable_group( - ) -LANGUAGE 'sql' -AS $BODY$ -CREATE TABLE IF NOT EXISTS variable_group -( - variable_group_id serial PRIMARY KEY, - variable_id INT NOT NULL, - group_id INT NOT NULL, - CONSTRAINT fk_group FOREIGN KEY (group_id) - REFERENCES groups (group_id) - ON DELETE CASCADE, - CONSTRAINT fk_variable FOREIGN KEY (variable_id) - REFERENCES variables (variable_id) - ON DELETE CASCADE, - UNIQUE (variable_id, group_id) -) -$BODY$; -ALTER PROCEDURE public.create_variable_group() - OWNER TO postgres;