Skip to content

Commit

Permalink
Feat: update create_aws_registry function + update unittests to refle…
Browse files Browse the repository at this point in the history
…ct changes
  • Loading branch information
lbesnard committed Feb 4, 2025
1 parent 43bbe61 commit 815fda8
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 26 deletions.
53 changes: 30 additions & 23 deletions aodn_cloud_optimised/bin/create_aws_registry_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import io
import json
import os
import sys
import tempfile
from argparse import RawTextHelpFormatter
from importlib.resources import files
Expand Down Expand Up @@ -375,29 +376,7 @@ def convert_to_opendata_registry(json_file, output_directory):
handler.create_metadata_aws_registry(target_directory=output_directory)


def main():
"""
Main function to convert JSON files to AWS OpenData Registry format.
The script can be run in different ways:
1. Convert a specific JSON file to AWS OpenData Registry format.
2. Convert all JSON files in the directory.
3. Run interactively to list all available JSON files and prompt the user to choose one to convert.
Important:
If the -g option is provided, the script will download metadata from the GeoNetwork metadata
record and prompt the user to choose to replace existing values or not.
Args (optional):
-f, --file (str): Name of a specific JSON file to convert.
-d, --directory (str): Output directory to save converted YAML files.
-a, --all: Convert all JSON files in the directory.
-g, --geonetwork: Retrieve metadata fields from GeoNetwork3 metadata record
If the directory is not specified, a temporary directory is created.
"""
def parse_args(arg_list: list[str] | None):
parser = argparse.ArgumentParser(
description="""
Create AWS OpenData Registry YAML files from the dataset configuration, ready to be added to the OpenData Github
Expand Down Expand Up @@ -437,8 +416,36 @@ def main():

args = parser.parse_args()

return args


def main(arg_list: list[str] | None = None):
"""
Main function to convert JSON files to AWS OpenData Registry format.
The script can be run in different ways:
1. Convert a specific JSON file to AWS OpenData Registry format.
2. Convert all JSON files in the directory.
3. Run interactively to list all available JSON files and prompt the user to choose one to convert.
Important:
If the -g option is provided, the script will download metadata from the GeoNetwork metadata
record and prompt the user to choose to replace existing values or not.
Args (optional):
-f, --file (str): Name of a specific JSON file to convert.
-d, --directory (str): Output directory to save converted YAML files.
-a, --all: Convert all JSON files in the directory.
-g, --geonetwork: Retrieve metadata fields from GeoNetwork3 metadata record
If the directory is not specified, a temporary directory is created.
"""
json_directory = str(files("aodn_cloud_optimised.config.dataset")._paths[0])

args = parse_args(sys.argv[1:])

if args.all:
json_files = list_json_files(json_directory)
if json_files:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
"parent_config": "radar_velocity_hourly_averaged_delayed_qc_main.json",
"metadata_uuid": "055342fc-f970-4be7-a764-8903220d42fb",
"aws_opendata_registry": {
"Name": "IMOS - ACORN - Turquoise Coast HF ocean radar site (Western Australia, Australia) - Delayed mode sea water velocity",
"Name": "Ocean Radar - Turquoise coast site - Sea water velocity - Delayed mode",
"Description": "The Turquoise Coast (TURQ) HF ocean radar system covers the area of shelf between Seabird and Jurien Bay and is the logical continuation of major research efforts to understand the role of the Leeuwin Current System (Leeuwin Current, the Leeuwin Undercurrent and Capes Current) in controlling not only the physical system but also its links to both pelagic and benthic ecosystems. In contrast to eastern ocean basins, which are highly productive, Western Australia experiences an oligotrophic environment. The Leeuwin Current is a shallow (<300 m deep), narrow band (< 100 km wide) of warm, lower salinity, nutrient depleted water of tropical origin that flows poleward from Exmouth to Cape Leeuwin and into the Great Australian Bight. The Current plays a dominant role in controlling the marine life and climate of the region. \n\nQuestions which may be addressed by using the HF ocean radar data from TURQ (and ROT) include the variability of the Leeuwin current and its response to the ENSO cycle; Leeuwin Current eddies and their interaction with the shelf waters; and the interaction between the Leeuwin Current, the Capes Current and coastal current during the summer. This is an important region for Western Rock lobster recruitment, and the meanders of the warm Leeuwin Current influence the ecology. This is a region with low tidal range and with a coastline subject to strong sea breezes and intense winter storms. Coastally trapped waves may be generated by the winter weather systems and by tropical cyclones in the summer. \n\nThe TURQ HF ocean radar system consists of two SeaSonde crossed loop direction finding stations located at Seabird (31.281 S 115.444 E) and Cervantes (30.506 S 115.060E). From 2012-12-15T11:00:00 the Cervantes station has been replaced by the Green Head station (30.073 S 114.967E) and from 2013-03-19T00:00:00 the Seabird station has been replaced by the Lancelin station (31.027 S 115.328 E). These radars operate at a frequency of 5.211 MHz, with a bandwidth of 50 KHz, a maximum range of 200 Km\nand a range resolution of 3 Km. Within the HF radar coverage area surface currents are measured.\n\nThe TURQ area of coverage has a small overlap of commonly observed ocean with the Rottnest Shelf (ROT) WERA HF ocean radar system on its south side. Together, the TURQ and ROT systems provide continuous monitoring of the shelf from Fremantle to Jurien Bay.",
"Documentation": "https://catalogue-imos.aodn.org.au/geonetwork/srv/eng/catalog.search#/metadata/055342fc-f970-4be7-a764-8903220d42fb",
"Contact": "[email protected]",
"ManagedBy": "AODN",
"UpdateFrequency": "As Needed",
"License": "http://creativecommons.org/licenses/by/4.0/",
"Citation": "IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access]",
"Citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\"",
"DataAtWork": {
"Tutorials": [
{
Expand All @@ -37,6 +37,11 @@
"Region": "ap-southeast-2",
"Type": "S3 Bucket"
}
],
"Tags": [
"oceans",
"ocean currents",
"ocean velocity"
]
}
}
11 changes: 10 additions & 1 deletion test_aodn_cloud_optimised/test_create_aws_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
)
DATASET_CONFIG_NC_ACORN_JSON = Path(DATASET_CONFIG_NC_ACORN_JSON)

CSV_EXTRA_INFO = os.path.join(ROOT_DIR, "resources", "IMOSPortalCollections.csv")

CSV_EXTRA_INFO = Path(CSV_EXTRA_INFO)


class TestGenericCloudOptimisedCreation(unittest.TestCase):
def setUp(self):
Expand All @@ -36,7 +40,12 @@ def tearDown(self):
def test_main(self, mock_parse_args):
# Prepare mock arguments
mock_parse_args.return_value = MagicMock(
file=DATASET_CONFIG_NC_ACORN_JSON, directory=self.tempdir, all=False
file=DATASET_CONFIG_NC_ACORN_JSON,
directory=self.tempdir,
all=False,
csv_path=CSV_EXTRA_INFO,
geonetwork=False,
# csv_path=None,
)

# Capture logs
Expand Down

0 comments on commit 815fda8

Please sign in to comment.