Skip to content

Commit

Permalink
Merge pull request #108 from aodn/RenameParquetDataQueries
Browse files Browse the repository at this point in the history
RenameParquetDataQueries
  • Loading branch information
lbesnard authored Dec 10, 2024
2 parents b368150 + c68c2fb commit 7a828e9
Show file tree
Hide file tree
Showing 164 changed files with 8,135 additions and 11,578 deletions.
4 changes: 2 additions & 2 deletions aodn_cloud_optimised/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .lib import ParquetDataQuery
from .lib import DataQuery
from .bin import create_aws_registry_dataset

__all__ = ["ParquetDataQuery", "create_aws_registry_dataset"]
__all__ = ["DataQuery", "create_aws_registry_dataset"]
107 changes: 38 additions & 69 deletions aodn_cloud_optimised/bin/argo.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,47 @@
#!/usr/bin/env python3
import subprocess
import sys

ORGS = [
"csiro",
"kordi",
"bodc",
"coriolis",
"csio",
"incois",
"jma",
"aoml",
"nmdis",
"meds",
"kma",
]


def main():
# splitting the path in a few commands so that the clusters are being recreated to avoid memory issues:
# task 1
# command = [
# "generic_cloud_optimised_creation",
# "--paths",
# "IMOS/Argo/dac/csiro",
# "IMOS/Argo/dac/kordi",
# "IMOS/Argo/dac/bodc",
# "IMOS/Argo/dac/csio",
# "IMOS/Argo/dac/incois",
# "IMOS/Argo/dac/jma",
# "--suffix",
# "_prof.nc",
# "--dataset-config",
# "argo.json",
# # "--clear-existing-data",
# "--force-previous-parquet-deletion",
# "--cluster-mode",
# "coiled",
# ]
#
# # Run the command
# subprocess.run(command, check=True)

# task 2
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/Argo/dac/coriolis",
"--suffix",
"_prof.nc",
"--dataset-config",
"argo.json",
"--force-previous-parquet-deletion",
"--cluster-mode",
"coiled",
]
subprocess.run(command, check=True)
for i, org in enumerate(ORGS):
command = [
"generic_cloud_optimised_creation",
"--paths",
f"IMOS/Argo/dac/{org}",
"--suffix",
"_prof.nc",
"--dataset-config",
"argo.json",
"--force-previous-parquet-deletion",
"--cluster-mode",
"coiled",
]

# Add the `--clear-existing-data` flag if `i == 0`
if i == 0:
command.append("--clear-existing-data")

# Run the command
subprocess.run(command, check=True)

# task 2
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/Argo/dac/aoml",
"IMOS/Argo/dac/nmdis",
"IMOS/Argo/dac/meds",
"IMOS/Argo/dac/kma",
"--suffix",
"_prof.nc",
"--dataset-config",
"argo.json",
"--force-previous-parquet-deletion",
"--cluster-mode",
"coiled",
]
subprocess.run(command, check=True)

# task 3
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/Argo/dac/nmdis",
"IMOS/Argo/dac/meds",
"IMOS/Argo/dac/kma",
"--suffix",
"_prof.nc",
"--dataset-config",
"argo.json",
"--force-previous-parquet-deletion",
"--cluster-mode",
"coiled",
]
subprocess.run(command, check=True)
if __name__ == "__main__":
main()
sys.exit(0)
14 changes: 8 additions & 6 deletions aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
--dataset-config 'mooring_ctd_delayed_qc.json'
generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024'
--dataset-config 'radar_TurquoiseCoast_velocity_hourly_average_delayed_qc.json' --clear-existing-data --cluster-mode 'coiled'
--dataset-config 'radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json' --clear-existing-data --cluster-mode 'coiled'
Arguments:
--paths: List of S3 paths to process. Example: 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA'
Expand All @@ -37,6 +37,7 @@

import argparse
from importlib.resources import files
import warnings

from aodn_cloud_optimised.lib import clusterLib
from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation
Expand All @@ -53,7 +54,7 @@ def main():
epilog="Examples:\n"
" generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA' --filters '_hourly-timeseries_' 'FV02' --dataset-config 'mooring_hourly_timeseries_delayed_qc.json' --clear-existing-data --cluster-mode 'coiled'\n"
" generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/QLD' --dataset-config 'mooring_ctd_delayed_qc.json'\n"
" generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024' --dataset-config 'radar_TurquoiseCoast_velocity_hourly_average_delayed_qc.json' --clear-existing-data --cluster-mode 'coiled'\n",
" generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024' --dataset-config 'radar_TurquoiseCoast_velocity_hourly_averaged_delayed_qc.json' --clear-existing-data --cluster-mode 'coiled'\n",
formatter_class=argparse.RawTextHelpFormatter,
)

Expand Down Expand Up @@ -91,13 +92,13 @@ def main():
"Only for Parquet processing.",
)

coiled_cluster_options = [mode.value for mode in clusterLib.ClusterMode]
cluster_options = [mode.value for mode in clusterLib.ClusterMode]
parser.add_argument(
"--cluster-mode",
# type=clusterLib.parse_cluster_mode,
default=clusterLib.ClusterMode.NONE.value,
choices=coiled_cluster_options,
help="Cluster mode to use. Options: 'local' or 'coiled'. Default is None.",
choices=cluster_options,
help=f"Cluster mode to use. Options: {cluster_options}. Default is None.",
)

parser.add_argument(
Expand Down Expand Up @@ -138,7 +139,8 @@ def main():
) # make the list unique!

if not nc_obj_ls:
raise ValueError("No files found matching the specified criteria.")
warnings.warn("No files found matching the specified criteria.")
return False

# Load dataset config
dataset_config_path = args.dataset_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ def main():

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions aodn_cloud_optimised/bin/mooring_ctd_delayed_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ def main():

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ def main():

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ def main():

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@


def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/ACORN/gridded_1h-avg-current-map_QC/BONC/",
# "--filters",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
"--dataset-config",
"radar_BonneyCoast_velocity_hourly_averaged_delayed_qc.json",
"--clear-existing-data",
"--cluster-mode",
"coiled",
]
for i, year in enumerate(range(2007, 2025)):
command = [
"generic_cloud_optimised_creation",
"--paths",
f"IMOS/ACORN/gridded_1h-avg-current-map_QC/BONC/{year}",
# "--filters",
# ".nc",
"--dataset-config",
"radar_BonneyCoast_velocity_hourly_averaged_delayed_qc.json",
"--cluster-mode",
"coiled",
]

# Run the command
subprocess.run(command, check=True)
# Add --clear-existing-data for the first iteration only
if i == 0:
command.append("--clear-existing-data")

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@


def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/ACORN/gridded_1h-avg-current-map_QC/CBG/",
# "--filters",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
"--dataset-config",
"radar_CapricornBunkerGroup_velocity_hourly_averaged_delayed_qc.json",
"--clear-existing-data",
"--cluster-mode",
"coiled",
]
for i, year in enumerate(range(2007, 2025)):
command = [
"generic_cloud_optimised_creation",
"--paths",
f"IMOS/ACORN/gridded_1h-avg-current-map_QC/CBG/{year}",
# "--filters",
# ".nc",
"--dataset-config",
"radar_CapricornBunkerGroup_velocity_hourly_averaged_delayed_qc.json",
"--cluster-mode",
"coiled",
]

# Run the command
subprocess.run(command, check=True)
# Add --clear-existing-data for the first iteration only
if i == 0:
command.append("--clear-existing-data")

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@


def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/ACORN/gridded_1h-avg-wave-site-map_QC/CBG/",
# "--filters",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
"--dataset-config",
"radar_CapricornBunkerGroup_wave_delayed_qc.json",
"--clear-existing-data",
"--cluster-mode",
"coiled",
]
for i, year in enumerate(range(2007, 2025)):
command = [
"generic_cloud_optimised_creation",
"--paths",
f"IMOS/ACORN/gridded_1h-avg-wave-site-map_QC/CBG/{year}",
# "--filters",
# ".nc",
"--dataset-config",
"radar_CapricornBunkerGroup_wave_delayed_qc.json",
"--cluster-mode",
"coiled",
]

# Run the command
subprocess.run(command, check=True)
# Add --clear-existing-data for the first iteration only
if i == 0:
command.append("--clear-existing-data")

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@


def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/ACORN/gridded_1h-avg-wind-map_QC/CBG/",
# "--filters",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
"--dataset-config",
"radar_CapricornBunkerGroup_wind_delayed_qc.json",
"--clear-existing-data",
"--cluster-mode",
"coiled",
]
for i, year in enumerate(range(2007, 2025)):
command = [
"generic_cloud_optimised_creation",
"--paths",
f"IMOS/ACORN/gridded_1h-avg-wind-map_QC/CBG/{year}",
# "--filters",
# ".nc",
"--dataset-config",
"radar_CapricornBunkerGroup_wind_delayed_qc.json",
"--cluster-mode",
"coiled",
]

# Run the command
subprocess.run(command, check=True)
# Add --clear-existing-data for the first iteration only
if i == 0:
command.append("--clear-existing-data")

# Run the command
subprocess.run(command, check=True)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 7a828e9

Please sign in to comment.