From 892c7a167ded815614e2dd6c3b3f3cf7f8de5695 Mon Sep 17 00:00:00 2001 From: John Sharples <41682323+John-Sharples@users.noreply.github.com> Date: Tue, 5 Nov 2024 03:18:52 +1100 Subject: [PATCH] 318: add tests for duplicate loads and errors (#350) --- METdbLoad/test/test_met_db_load.py | 329 +++++++++++++++++++++++++++-- METdbLoad/ush/met_db_load.py | 13 +- 2 files changed, 321 insertions(+), 21 deletions(-) diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py index a4c3727c..c4712f7d 100644 --- a/METdbLoad/test/test_met_db_load.py +++ b/METdbLoad/test/test_met_db_load.py @@ -1,6 +1,8 @@ import pytest -from unittest.mock import patch +import sys +from unittest.mock import patch, MagicMock from METdbLoad.ush.met_db_load import main as load_main +from METdbLoad.ush.met_db_load import purge_files, parse_args, next_set, print_version from METdbLoad.ush.run_sql import RunSql from METdbLoad.test.utils import dict_to_args @@ -109,9 +111,9 @@ def assert_count_rows(cur, table, expected_count): ( RHIST_DATA_DIR, "ensemble_stat", - { + { "line_data_rhist": 2, - "line_data_rhist_rank":22, + "line_data_rhist_rank": 22, "line_data_phist": 2, "line_data_phist_bin": 40, "line_data_ecnt": 2, @@ -137,7 +139,11 @@ def test_met_db_table_counts( test_args = dict_to_args( { - "xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags=load_flags)), + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), "index": "true", "tmpdir": [str(tmp_path)], "loglevel": None, @@ -150,6 +156,148 @@ def test_met_db_table_counts( assert_count_rows(testRunSql.cur, table, expected_count) +@pytest.mark.parametrize( + "met_data_dir, met_tool, expected_counts", + [ + ( + GRID_STAT_DATA_DIR, + "grid_stat", + { + "line_data_eclv": 9, + "line_data_fho": 9, + "line_data_eclv_pnt": 171, + "line_data_cts": 9, + "line_data_ctc": 9, + "line_data_cnt": 3, + }, + ), + ( + MTD_DATA_DIR, + "mtd", + { + "mtd_2d_obj": 278, + "mtd_3d_obj_single": 8, + "mtd_3d_obj_pair": 4, + "mtd_header": 24, + }, + ), + ( + MODE_DATA_DIR, + "mtd", + { + "mode_cts": 2, + "mode_obj_pair": 5, + "mode_obj_single": 6, + }, + ), + ], +) +def test_met_db_table_dups( + emptyDB, + testRunSql, + tmp_path, + met_data_dir, + met_tool, + expected_counts, +): + + load_flags = { + "mode_header_db_check": "true", + "mtd_header_db_check": "true", + "force_dup_file": "false", + } + test_args = dict_to_args( + { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + "loglevel": None, + } + ) + + load_main(test_args) + # load again to check duplicates aren't loaded in db + load_main(test_args) + + for table, expected_count in expected_counts.items(): + assert_count_rows(testRunSql.cur, table, expected_count) + + +@pytest.mark.parametrize( + "met_data_dir, met_tool, expected_counts", + [ + ( + GRID_STAT_DATA_DIR, + "grid_stat", + { + "line_data_eclv": 9, + "line_data_fho": 9, + "line_data_eclv_pnt": 171, + "line_data_cts": 9, + "line_data_ctc": 9, + "line_data_cnt": 3, + }, + ), + ( + MTD_DATA_DIR, + "mtd", + { + "mtd_2d_obj": 278, + "mtd_3d_obj_single": 8, + "mtd_3d_obj_pair": 4, + "mtd_header": 24 / 2, # header not duplicated + }, + ), + ( + MODE_DATA_DIR, + "mtd", + { + "mode_cts": 2, + "mode_obj_pair": 5, + "mode_obj_single": 6, + }, + ), + ], +) +def test_met_db_table_dups_allowed( + emptyDB, + testRunSql, + tmp_path, + met_data_dir, + met_tool, + expected_counts, +): + + load_flags = { + "mode_header_db_check": "true", + "mtd_header_db_check": "true", + "force_dup_file": "true", + } + test_args = dict_to_args( + { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + "loglevel": None, + } + ) + + load_main(test_args) + # load again to add duplicates + load_main(test_args) + + for table, expected_count in expected_counts.items(): + assert_count_rows(testRunSql.cur, table, expected_count * 2) + + def test_met_db_indexes( emptyDB, testRunSql, @@ -168,7 +316,7 @@ def test_met_db_indexes( ), "index": "false", "tmpdir": [str(tmp_path)], - "loglevel": None + "loglevel": None, } ) @@ -192,9 +340,10 @@ def test_met_db_indexes( with patch.object(RunSql, "apply_indexes", side_effect=KeyError): load_main(test_args) + @pytest.mark.parametrize( - "met_data_dir, met_tool, expected_counts, local_infile", - [ + "met_data_dir, met_tool, expected_counts, local_infile", + [ ( POINT_STAT_DATA_DIR, "point_stat", @@ -206,7 +355,7 @@ def test_met_db_indexes( "line_data_cnt": 10, "line_data_vl1l2": 1, }, - 'false', + "false", ), ( POINT_STAT_DATA_DIR, @@ -219,7 +368,7 @@ def test_met_db_indexes( "line_data_cnt": 10, "line_data_vl1l2": 1, }, - 'true', + "true", ), ( MTD_DATA_DIR, @@ -228,25 +377,31 @@ def test_met_db_indexes( "mtd_2d_obj": 278, "mtd_3d_obj_single": 8, }, - 'false', + "false", ), ( - MTD_DATA_DIR, + MTD_DATA_DIR, "mtd", { "mtd_2d_obj": 278, "mtd_3d_obj_single": 8, }, - 'true', + "true", ), - ], + ], ) -def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile): +def test_local_in_file( + emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile +): """check we get the same result when local_file is on or off""" test_args = dict_to_args( { - "xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, local_infile=local_infile)), + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, local_infile=local_infile + ) + ), "index": "false", "tmpdir": [str(tmp_path)], "loglevel": None, @@ -257,3 +412,147 @@ def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, ex for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) + + +def test_empty_files(tmp_path): + """Junk files shouldn't cause an error when running load_main""" + + met_data_dir = tmp_path / "empty_files_test" + met_data_dir.mkdir() + + open(met_data_dir / "mtd_empty_2d.txt", "a").close() + with open(met_data_dir / "mtd_bad_header_3d.txt", "w") as f: + f.write("SOME HEADER INFO\n") + + with open(met_data_dir / "mtd_good_header_bad_data_3d.txt", "w") as f: + f.write( + "VERSION MODEL DESC FCST_LEAD FCST_VALID OBS_LEAD OBS_VALID T_DELTA FCST_T_BEG FCST_T_END FCST_RAD FCST_THR OBS_T_BEG OBS_T_END OBS_RAD OBS_THR FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBJECT_ID OBJECT_CAT SPACE_CENTROID_DIST TIME_CENTROID_DELTA AXIS_DIFF SPEED_DELTA DIRECTION_DIFF VOLUME_RATIO START_TIME_DELTA END_TIME_DELTA INTERSECTION_VOLUME DURATION_DIFF INTEREST\n 1 2 3 xxx" + ) + + with open(met_data_dir / "grid_stat_header_only.stat", "w") as f: + f.write( + "VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE\n" + ) + + test_args = dict_to_args( + { + "xmlfile": str( + get_xml_test_file( + tmp_path, + met_data_dir, + "mtd", + ) + ), + "index": "false", + "tmpdir": [str(tmp_path)], + "loglevel": None, + } + ) + + load_main(test_args) + + +def test_print_version(): + mock_logger = MagicMock() + print_version(mock_logger) + assert mock_logger.info.called_once + assert mock_logger.info.call_args[0][0].startswith("METdbload Version:") + + with pytest.raises(SystemExit): + with patch("os.path.dirname", side_effect=TypeError): + print_version(mock_logger) + assert mock_logger.error.call_count == 2 + + +@pytest.mark.parametrize( + "mid,last,expected", + [ + (99, 100, (100, 100, 100)), + (99, 347, (100, 200, 347)), + (99, 147, (100, 147, 147)), + ], +) +def test_next_set(mid, last, expected): + assert next_set(mid, last) == expected + + +@pytest.mark.parametrize( + "xml_flags,expected", + [ + ( + { + "load_stat": False, + "load_mode": False, + "load_mtd": False, + }, + [], + ), + ( + { + "load_stat": True, + "load_mode": False, + "load_mtd": False, + }, + ["test.stat", "test.vsdb"], + ), + ( + { + "load_stat": False, + "load_mode": True, + "load_mtd": True, + }, + [ + "test_cts.txt", + "test_obj.txt", + "test_2d.txt", + "test_3d_s.txt", + "test_3d_p.txt", + ], + ), + ], +) +def test_purge_files(xml_flags, expected): + load_files = [ + "test.stat", + "test.vsdb", + "test_cts.txt", + "test_obj.txt", + "test_2d.txt", + "test_3d_s.txt", + "test_3d_p.txt", + ] + + mock_logger = MagicMock() + + actual = purge_files(load_files, xml_flags, mock_logger) + assert expected == actual + + +def test_purge_files_raises(): + mock_logger = MagicMock() + with pytest.raises(SystemExit): + purge_files([], {}, mock_logger) + assert mock_logger.error.call_count == 2 + + +def test_parse_args(): + good_args = [ + "met_db_load.py", + "-index", + "test/test_load_specification.xml", + "--loglevel", + "ERROR", + ] + bad_args = ["met_db_load.py", "-f", "test/test_load_specification.xml"] + + # should run without error + with patch.object(sys, "argv", good_args): + args = parse_args() + assert args.index + assert args.xmlfile == "test/test_load_specification.xml" + assert args.loglevel == "ERROR" + + # produces error + with pytest.raises(SystemExit): + with patch.object(sys, "argv", bad_args): + parse_args() diff --git a/METdbLoad/ush/met_db_load.py b/METdbLoad/ush/met_db_load.py index 52113913..5524226d 100644 --- a/METdbLoad/ush/met_db_load.py +++ b/METdbLoad/ush/met_db_load.py @@ -427,7 +427,7 @@ def purge_files(load_files, xml_flags, logger): "3d_s" in item.lower() or "3d_p" in item.lower())] - except (RuntimeError, TypeError, NameError, KeyError): + except (RuntimeError, TypeError, NameError, KeyError) as e: logger.error("*** %s occurred in purge_files ***", sys.exc_info()[0]) logger.error( "*** %s occurred in Main purging files not selected ***", sys.exc_info()[0]) @@ -436,7 +436,7 @@ def purge_files(load_files, xml_flags, logger): return updated_list -if __name__ == '__main__': +def parse_args(): try: parser = argparse.ArgumentParser() # Allow user to choose dir for tmp files - default to user home @@ -450,10 +450,11 @@ def purge_files(load_files, xml_flags, logger): parser.add_argument("--loglevel", default=None, type=str, choices={"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}, help="Optional - specify log level. One of: DEBUG, INFO, WARNING, ERROR, CRITICAL.") # get the command line arguments - args = parser.parse_args() + return parser.parse_args() except: - print( - "*** %s occurred setting up met_db_load ***", sys.exc_info()[0]) + print("*** An error occurred parsing command line args ***") sys.exit("*** Error setting up met_db_load") - main(args) + +if __name__ == '__main__': + main(parse_args())