From 815fda8c6b1a0b873843f0f60ad8bf7a66c04c3e Mon Sep 17 00:00:00 2001 From: lbesnard Date: Tue, 4 Feb 2025 14:02:55 +1100 Subject: [PATCH] Feat: update create_aws_registry function + update unittests to reflect changes --- .../bin/create_aws_registry_dataset.py | 53 +++++++++++-------- ...t_velocity_hourly_averaged_delayed_qc.json | 9 +++- .../test_create_aws_registry.py | 11 +++- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/aodn_cloud_optimised/bin/create_aws_registry_dataset.py b/aodn_cloud_optimised/bin/create_aws_registry_dataset.py index 400f70f..8237b48 100644 --- a/aodn_cloud_optimised/bin/create_aws_registry_dataset.py +++ b/aodn_cloud_optimised/bin/create_aws_registry_dataset.py @@ -27,6 +27,7 @@ import io import json import os +import sys import tempfile from argparse import RawTextHelpFormatter from importlib.resources import files @@ -375,29 +376,7 @@ def convert_to_opendata_registry(json_file, output_directory): handler.create_metadata_aws_registry(target_directory=output_directory) -def main(): - """ - Main function to convert JSON files to AWS OpenData Registry format. - - The script can be run in different ways: - - 1. Convert a specific JSON file to AWS OpenData Registry format. - 2. Convert all JSON files in the directory. - 3. Run interactively to list all available JSON files and prompt the user to choose one to convert. - - Important: - If the -g option is provided, the script will download metadata from the GeoNetwork metadata - record and prompt the user to choose to replace existing values or not. - - - Args (optional): - -f, --file (str): Name of a specific JSON file to convert. - -d, --directory (str): Output directory to save converted YAML files. - -a, --all: Convert all JSON files in the directory. - -g, --geonetwork: Retrieve metadata fields from GeoNetwork3 metadata record - - If the directory is not specified, a temporary directory is created. - """ +def parse_args(arg_list: list[str] | None): parser = argparse.ArgumentParser( description=""" Create AWS OpenData Registry YAML files from the dataset configuration, ready to be added to the OpenData Github @@ -437,8 +416,36 @@ def main(): args = parser.parse_args() + return args + + +def main(arg_list: list[str] | None = None): + """ + Main function to convert JSON files to AWS OpenData Registry format. + + The script can be run in different ways: + + 1. Convert a specific JSON file to AWS OpenData Registry format. + 2. Convert all JSON files in the directory. + 3. Run interactively to list all available JSON files and prompt the user to choose one to convert. + + Important: + If the -g option is provided, the script will download metadata from the GeoNetwork metadata + record and prompt the user to choose to replace existing values or not. + + + Args (optional): + -f, --file (str): Name of a specific JSON file to convert. + -d, --directory (str): Output directory to save converted YAML files. + -a, --all: Convert all JSON files in the directory. + -g, --geonetwork: Retrieve metadata fields from GeoNetwork3 metadata record + + If the directory is not specified, a temporary directory is created. + """ json_directory = str(files("aodn_cloud_optimised.config.dataset")._paths[0]) + args = parse_args(sys.argv[1:]) + if args.all: json_files = list_json_files(json_directory) if json_files: diff --git a/test_aodn_cloud_optimised/resources/radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json b/test_aodn_cloud_optimised/resources/radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json index 1a39514..6c3efb5 100644 --- a/test_aodn_cloud_optimised/resources/radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json +++ b/test_aodn_cloud_optimised/resources/radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json @@ -4,14 +4,14 @@ "parent_config": "radar_velocity_hourly_averaged_delayed_qc_main.json", "metadata_uuid": "055342fc-f970-4be7-a764-8903220d42fb", "aws_opendata_registry": { - "Name": "IMOS - ACORN - Turquoise Coast HF ocean radar site (Western Australia, Australia) - Delayed mode sea water velocity", + "Name": "Ocean Radar - Turquoise coast site - Sea water velocity - Delayed mode", "Description": "The Turquoise Coast (TURQ) HF ocean radar system covers the area of shelf between Seabird and Jurien Bay and is the logical continuation of major research efforts to understand the role of the Leeuwin Current System (Leeuwin Current, the Leeuwin Undercurrent and Capes Current) in controlling not only the physical system but also its links to both pelagic and benthic ecosystems. In contrast to eastern ocean basins, which are highly productive, Western Australia experiences an oligotrophic environment. The Leeuwin Current is a shallow (<300 m deep), narrow band (< 100 km wide) of warm, lower salinity, nutrient depleted water of tropical origin that flows poleward from Exmouth to Cape Leeuwin and into the Great Australian Bight. The Current plays a dominant role in controlling the marine life and climate of the region. \n\nQuestions which may be addressed by using the HF ocean radar data from TURQ (and ROT) include the variability of the Leeuwin current and its response to the ENSO cycle; Leeuwin Current eddies and their interaction with the shelf waters; and the interaction between the Leeuwin Current, the Capes Current and coastal current during the summer. This is an important region for Western Rock lobster recruitment, and the meanders of the warm Leeuwin Current influence the ecology. This is a region with low tidal range and with a coastline subject to strong sea breezes and intense winter storms. Coastally trapped waves may be generated by the winter weather systems and by tropical cyclones in the summer. \n\nThe TURQ HF ocean radar system consists of two SeaSonde crossed loop direction finding stations located at Seabird (31.281 S 115.444 E) and Cervantes (30.506 S 115.060E). From 2012-12-15T11:00:00 the Cervantes station has been replaced by the Green Head station (30.073 S 114.967E) and from 2013-03-19T00:00:00 the Seabird station has been replaced by the Lancelin station (31.027 S 115.328 E). These radars operate at a frequency of 5.211 MHz, with a bandwidth of 50 KHz, a maximum range of 200 Km\nand a range resolution of 3 Km. Within the HF radar coverage area surface currents are measured.\n\nThe TURQ area of coverage has a small overlap of commonly observed ocean with the Rottnest Shelf (ROT) WERA HF ocean radar system on its south side. Together, the TURQ and ROT systems provide continuous monitoring of the shelf from Fremantle to Jurien Bay.", "Documentation": "https://catalogue-imos.aodn.org.au/geonetwork/srv/eng/catalog.search#/metadata/055342fc-f970-4be7-a764-8903220d42fb", "Contact": "info@aodn.org.au", "ManagedBy": "AODN", "UpdateFrequency": "As Needed", "License": "http://creativecommons.org/licenses/by/4.0/", - "Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]", + "Citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\"", "DataAtWork": { "Tutorials": [ { @@ -37,6 +37,11 @@ "Region": "ap-southeast-2", "Type": "S3 Bucket" } + ], + "Tags": [ + "oceans", + "ocean currents", + "ocean velocity" ] } } diff --git a/test_aodn_cloud_optimised/test_create_aws_registry.py b/test_aodn_cloud_optimised/test_create_aws_registry.py index 5f00704..748d4b5 100644 --- a/test_aodn_cloud_optimised/test_create_aws_registry.py +++ b/test_aodn_cloud_optimised/test_create_aws_registry.py @@ -20,6 +20,10 @@ ) DATASET_CONFIG_NC_ACORN_JSON = Path(DATASET_CONFIG_NC_ACORN_JSON) +CSV_EXTRA_INFO = os.path.join(ROOT_DIR, "resources", "IMOSPortalCollections.csv") + +CSV_EXTRA_INFO = Path(CSV_EXTRA_INFO) + class TestGenericCloudOptimisedCreation(unittest.TestCase): def setUp(self): @@ -36,7 +40,12 @@ def tearDown(self): def test_main(self, mock_parse_args): # Prepare mock arguments mock_parse_args.return_value = MagicMock( - file=DATASET_CONFIG_NC_ACORN_JSON, directory=self.tempdir, all=False + file=DATASET_CONFIG_NC_ACORN_JSON, + directory=self.tempdir, + all=False, + csv_path=CSV_EXTRA_INFO, + geonetwork=False, + # csv_path=None, ) # Capture logs