diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 72b2514..a857ba4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -59,9 +59,6 @@ jobs: source .venv/bin/activate poetry run pytest - - name: Build package - run: poetry build - # Configure git for committing version bump - name: Configure git for committing version bump run: | @@ -82,6 +79,10 @@ jobs: run: | git push origin HEAD:main + # build the package after bumping version + - name: Build package + run: poetry build + - name: Delete existing tag (if any) run: | git tag -d v${{ env.new_version }} || true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0de3735..648fcee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,7 @@ repos: files: \.json$ exclude: ^aodn_cloud_optimised/config/dataset/dataset_template.json$ - id: end-of-file-fixer + exclude: '\.schema$' - id: trailing-whitespace - id: check-toml diff --git a/README.md b/README.md index cbb2637..e509a99 100644 --- a/README.md +++ b/README.md @@ -5,24 +5,44 @@ ![Release](https://img.shields.io/github/v/release/aodn/aodn_cloud_optimised.svg) [![codecov](https://codecov.io/gh/aodn/aodn_cloud_optimised/branch/main/graph/badge.svg)](https://codecov.io/gh/aodn/aodn_cloud_optimised/branch/main) -A tool designed to convert IMOS NetCDF files and CSV into Cloud Optimised formats such as Zarr and Parquet +A tool designed to convert IMOS NetCDF and CSV files into Cloud Optimised formats such as Zarr and Parquet ## Key Features -* Conversion of a dataset with YAML Configuration: Convert tabular data (CSV or NetCDF) to Parquet and gridded data to Zarr using YAML configuration files only. -* Preservation of NetCDF Metadata: Maintain NetCDF global attributes metadata -* Improve Querying of Cloud Optimised data by Geographical Bounding box and Time Slice: Create geometry polygon and time slice partitions for Parquet dataset, facilitating efficient data querying by time and geographical bounding box. -* Data Reprocessing: Easily reprocess NetCDF files into Zarr and Parquet formats +* Conversion of CSV/NetCDF to Cloud Optimised format (Zarr/Parquet) + * YAML configuration approach with parent and child YAML configuration if multiple dataset are very similar (i.e. Radar ACORN, GHRSST, see [config](https://github.com/aodn/aodn_cloud_optimised/tree/main/aodn_cloud_optimised/config/dataset)) + * Generic handlers for most dataset ([GenericParquetHandler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/GenericParquetHandler.py), [GenericZarrHandler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/GenericZarrHandler.py)). + * Specific handlers can be written and inherits methods from a generic handler ([Argo handler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/ArgoHandler.py), [Mooring Timseries Handler](https://github.com/aodn/aodn_cloud_optimised/blob/main/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py)) +* Clustering capability: + * Local dask cluster + * Remote Coiled cluster + * driven by configuration/can be easily overwritten + * Zarr: gridded dataset are done in batch and in parallel with xarray.open_mfdataset + * Parquet: tabular files are done in batch and in parallel as independent task, done with future +* Reprocessing: + * Zarr,: reprocessing is achieved by writting to specific regions with slices. Non-contigous regions are handled + * Parquet: reprocessing is done via pyarrow internal overwritting function, but can also be forced in case an input file has significantly changed +* Chunking: + * Parquet: to facilitate the query of geospatial data, polygon and timestamp slices are created as partitions + * Zarr: done via dataset configuration +* Metadata: + * Parquet: Metadata is created as a sidecar _metadata.parquet file +* Unittesting of module: Very close to integration testing, local cluster is used to create cloud optimised files + # Installation ## Users Requirements: * python >= 3.10.14 +### automatic install of latest wheel release ```bash curl -s https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/install.sh | bash ``` +Otherwise go to +github.com/aodn/aodn_cloud_optimised/releases/latest + ## Development Requirements: * Mamba from miniforge3: https://github.com/conda-forge/miniforge @@ -46,57 +66,76 @@ to update the poetry.lock file. Commit the changes to poetry.lock # Requirements AWS SSO to push files to S3 -# Features List - -## Parquet Features -| Feature | Status | Comment | -|------------------------------------------------------------------------------------------------|--------|------------------------------------------------------------------------------------| -| Process IMOS tabular NetCDF to Parquet with GenericHandler | Done | Converts NetCDF files to Parquet format using a generic handler. | -| Process CSV to Parquet with GenericHandler | Done | Converts CSV files to Parquet format using a generic handler. | -| Specific Handlers inherit all methods from GenericHandler with super() | Done | Simplifies the creation of new handlers by inheriting methods. | -| Unittests implemented | Done | Tests to ensure functionality and reliability. | -| Reprocessing of files already converted to Parquet | Done | Reprocessing of NetCDF files; original method can be slow for large datasets. | -| Metadata variable attributes in sidecar parquet file | Done | Metadata attributes available in dataset sidecars. | -| Add new variables to dataset | Done | Addition of new variables such as site_code, deployment_code, filename attributes. | -| Add timestamp variable for partition key | Done | Enhances query performance by adding a timestamp variable. | -| Remove NaN timestamp when NetCDF not CF compliant | Done | Eliminates NaN timestamps, particularly for non CF compliant data like Argo. | -| Create dataset Schema | Done | Creation of a schema for the dataset. | -| Create missing variables available in Schema | Done | Ensures dataset consistency by adding missing variables from the schema. | -| Warning when new variable from NetCDF is missing from Schema | Done | Alerts when a new variable from NetCDF is absent in the schema. | -| Creating metadata parquet sidecar | Done | | -| Create AWS OpenData Registry Yaml | Done | -| Config file JSON validation against schema | Done | -| Create polygon variable to facilite geometry queries | Done | - -## Zarr Features -| Feature | Status | Comment | -|------------------------------------------------------------------------|--------|------------------------------------------------------------------------------------| -| Process IMOS Gridded NetCDF to Zarr with GenericHandler | Done | Converts NetCDF files to Parquet format using a generic handler. | -| Specific Handlers inherit all methods from GenericHandler with super() | Done | Simplifies the creation of new handlers by inheriting methods. | +# Usage +## As a standalone bash script +```bash +generic_cloud_optimised_creation -h +usage: generic_cloud_optimised_creation [-h] --paths PATHS [PATHS ...] [--filters [FILTERS ...]] [--suffix SUFFIX] --dataset-config + DATASET_CONFIG [--clear-existing-data] [--force-previous-parquet-deletion] + [--cluster-mode {local,remote}] + +Process S3 paths and create cloud-optimized datasets. + +options: + -h, --help show this help message and exit + --paths PATHS [PATHS ...] + List of S3 paths to process. Example: 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA' + --filters [FILTERS ...] + Optional filter strings to apply on the S3 paths. Example: '_hourly-timeseries_' 'FV02' + --suffix SUFFIX Optional suffix used by s3_ls to filter S3 objects. Example: '.nc' + --dataset-config DATASET_CONFIG + Path to the dataset config JSON file. Example: 'anmn_hourly_timeseries.json' + --clear-existing-data + Flag to clear existing data. Default is False. + --force-previous-parquet-deletion + Flag to force the search of previous equivalent parquet file created. Much slower. Default is False. + --cluster-mode {local,remote} + Cluster mode to use. Options: 'local' or 'remote'. Default is 'local'. + +Examples: + generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA' --filters '_hourly-timeseries_' 'FV02' --dataset-config 'anmn_hourly_timeseries.json' --clear-existing-data --cluster-mode 'remote' + generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/QLD' --dataset-config 'anmn_ctd_ts_fv01.json' + generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024' --dataset-config 'acorn_gridded_qc_turq.json' --clear-existing-data --cluster-mode 'remote' -# Usage +``` -## Parquet -The GenericHandler for parquet dataset creation is designed to be used either as a standalone class or as a base class for more specialised handler implementations. Here's a basic usage example: +## As a python module ```python -# Read the content of the dataset template JSON file (with comments) -#import commentjson -#with open('aodn_cloud_optimised/config/dataset/dataset_template.json', 'r') as file: -# json_with_comments = file.read() -#dataset_config = commentjson.loads(json_with_comments) - import importlib.resources -from aodn_cloud_optimised.lib.config import load_dataset_config -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation -dataset_config = load_dataset_config(str(importlib.resources.path("aodn_cloud_optimised.config.dataset", "anfog_slocum_glider.json"))) - -cloud_optimised_creation('object/path/netcdf_file.nc', - dataset_config=dataset_config - ) +from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation +from aodn_cloud_optimised.lib.config import ( + load_variable_from_config, + load_dataset_config, +) +from aodn_cloud_optimised.lib.s3Tools import s3_ls + + +def main(): + BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") + nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2024") + + dataset_config = load_dataset_config( + str( + importlib.resources.path( + "aodn_cloud_optimised.config.dataset", "srs_l3s_1d_dn.json" + ) + ) + ) + + cloud_optimised_creation( + nc_obj_ls, + dataset_config=dataset_config, + reprocess=True, + cluster_mode='remote' + ) + + +if __name__ == "__main__": + main() ``` diff --git a/README_add_new_dataset.md b/README_add_new_dataset.md index 045e622..5cf49ce 100644 --- a/README_add_new_dataset.md +++ b/README_add_new_dataset.md @@ -1,4 +1,9 @@ +# Creating a dataset configuration file + + This module aims to be generic enough so that adding a new IMOS dataset is driven through a json config file. +Examples of dataset configuration can be found at [config](https://github.com/aodn/aodn_cloud_optimised/tree/main/aodn_cloud_optimised/config/dataset). + For more complicated dataset, such as Argo for example, it's also possible to create a specific handler which would inherit with ```Super()``` all of the methods for the ```aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler``` class. diff --git a/aodn_cloud_optimised/bin/aatams_acoustic_tagging.py b/aodn_cloud_optimised/bin/aatams_acoustic_tagging.py index 2f35a53..e25d758 100755 --- a/aodn_cloud_optimised/bin/aatams_acoustic_tagging.py +++ b/aodn_cloud_optimised/bin/aatams_acoustic_tagging.py @@ -1,31 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/AATAMS/acoustic_tagging/", suffix=".csv") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "aatams_acoustic_tagging.json" - ) - ) - ) - - cloud_optimised_creation_loop( - obj_ls, - dataset_config=dataset_config, - ) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/AATAMS/acoustic_tagging/", + "--dataset-config", + "aatams_acoustic_tagging.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/acorn_gridded_qc_turq.py b/aodn_cloud_optimised/bin/acorn_gridded_qc_turq.py old mode 100644 new mode 100755 index 1a1bc33..3c77429 --- a/aodn_cloud_optimised/bin/acorn_gridded_qc_turq.py +++ b/aodn_cloud_optimised/bin/acorn_gridded_qc_turq.py @@ -1,43 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.GenericZarrHandler import GenericHandler -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop - -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls( - BUCKET_RAW_DEFAULT, "IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2023" - ) - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "acorn_gridded_qc_turq.json" - ) - ) - ) - - # First zarr creation - cloud_optimised_creation_loop( - [nc_obj_ls[0]], dataset_config=dataset_config, reprocess=True - ) - - # append to zarr - cloud_optimised_creation_loop(nc_obj_ls[1:], dataset_config=dataset_config) - # rechunking - GenericHandler( - input_object_key=nc_obj_ls[0], - dataset_config=dataset_config, - ).rechunk() - - -if __name__ == "__main__": - main() + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024/01/", + "--dataset-config", + "acorn_gridded_qc_turq.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] + + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/anfog_to_parquet.py b/aodn_cloud_optimised/bin/anfog_to_parquet.py index ac092de..0ba1e99 100755 --- a/aodn_cloud_optimised/bin/anfog_to_parquet.py +++ b/aodn_cloud_optimised/bin/anfog_to_parquet.py @@ -1,28 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANFOG/slocum_glider") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "anfog_slocum_glider.json" - ) - ) - ) - - cloud_optimised_creation_loop(nc_obj_ls, dataset_config=dataset_config) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/ANFOG/slocum_glider", + "--dataset-config", + "anfog_slocum_glider.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/anmn_aqualogger_to_parquet.py b/aodn_cloud_optimised/bin/anmn_aqualogger_to_parquet.py index 59b0f35..a1f727b 100755 --- a/aodn_cloud_optimised/bin/anmn_aqualogger_to_parquet.py +++ b/aodn_cloud_optimised/bin/anmn_aqualogger_to_parquet.py @@ -1,43 +1,24 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - - nc_obj_ls = ( - s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/NSW") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/PA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/QLD") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/SA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/WA") - ) - - # Aqualogger - temperature_logger_ts_fv01_ls = [ - s for s in nc_obj_ls if ("/Temperature/" in s) and ("FV01" in s) + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/ANMN/NSW", + "IMOS/ANMN/PA", + "IMOS/ANMN/QLD", + "IMOS/ANMN/SA", + "IMOS/ANMN/WA", + "--filters", + "'/Temperature/', 'FV01'", + "--dataset-config", + "anmn_temperature_logger_ts_fv01.json", + "--clear-existing-data", + "--cluster-mode", + "remote", ] - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", - "anmn_temperature_logger_ts_fv01.json", - ) - ) - ) - - cloud_optimised_creation_loop( - temperature_logger_ts_fv01_ls, dataset_config=dataset_config - ) - -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/anmn_ctd_to_parquet.py b/aodn_cloud_optimised/bin/anmn_ctd_to_parquet.py index a06b275..19c9509 100755 --- a/aodn_cloud_optimised/bin/anmn_ctd_to_parquet.py +++ b/aodn_cloud_optimised/bin/anmn_ctd_to_parquet.py @@ -1,38 +1,25 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - - nc_obj_ls = ( - s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/NSW") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/PA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/QLD") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/SA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/WA") - ) - - # CTD - ctd_ts_fv01_ls = [s for s in nc_obj_ls if ("CTD_timeseries" in s) and ("FV01" in s)] - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "anmn_ctd_ts_fv01.json" - ) - ) - ) - - cloud_optimised_creation_loop(ctd_ts_fv01_ls, dataset_config=dataset_config) - - -if __name__ == "__main__": - main() + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/ANMN/NSW", + "IMOS/ANMN/PA", + "IMOS/ANMN/QLD", + "IMOS/ANMN/SA", + "IMOS/ANMN/WA", + "--filters", + "/CTD_timeseries/", + "FV01", + "--dataset-config", + "anmn_ctd_ts_fv01.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] + + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/anmn_hourly_timeseries.py b/aodn_cloud_optimised/bin/anmn_hourly_timeseries.py old mode 100644 new mode 100755 index 9b48a7b..41be9ec --- a/aodn_cloud_optimised/bin/anmn_hourly_timeseries.py +++ b/aodn_cloud_optimised/bin/anmn_hourly_timeseries.py @@ -1,40 +1,25 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls -from aodn_cloud_optimised.lib.AnmnHourlyTsHandler import AnmnHourlyTsHandler +import subprocess def main(): - - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - - nc_obj_ls = ( - s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/NSW") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/PA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/QLD") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/SA") - + s3_ls(BUCKET_RAW_DEFAULT, "IMOS/ANMN/WA") - ) - - nc_obj_ls = [s for s in nc_obj_ls if ("_hourly-timeseries_" in s) and ("FV02" in s)] - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "anmn_hourly_timeseries.json" - ) - ) - ) - - cloud_optimised_creation_loop( - nc_obj_ls, dataset_config=dataset_config, handler_class=AnmnHourlyTsHandler - ) - - -if __name__ == "__main__": - main() + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/ANMN/NSW", + "IMOS/ANMN/PA", + "IMOS/ANMN/QLD", + "IMOS/ANMN/SA", + "IMOS/ANMN/WA", + "--filters", + "_hourly-timeseries_", + "FV02", + "--dataset-config", + "anmn_hourly_timeseries.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] + + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/ardc_wave_to_parquet.py b/aodn_cloud_optimised/bin/ardc_wave_to_parquet.py index 456d5ee..8ef169e 100755 --- a/aodn_cloud_optimised/bin/ardc_wave_to_parquet.py +++ b/aodn_cloud_optimised/bin/ardc_wave_to_parquet.py @@ -1,35 +1,20 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = ( - s3_ls( - BUCKET_RAW_DEFAULT, - "Department_of_Transport-Western_Australia/WAVE-BUOYS/REALTIME/", - ) - + s3_ls(BUCKET_RAW_DEFAULT, "Bureau_of_Meteorology/WAVE-BUOYS/REALTIME/") - + s3_ls(BUCKET_RAW_DEFAULT, "Deakin_University/WAVE-BUOYS/REALTIME") - ) - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "ardc_wave_nrt.json" - ) - ) - ) - - cloud_optimised_creation_loop(nc_obj_ls, dataset_config=dataset_config) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "Department_of_Transport-Western_Australia/WAVE-BUOYS/REALTIME/", + "Bureau_of_Meteorology/WAVE-BUOYS/REALTIME/", + "Deakin_University/WAVE-BUOYS/REALTIME", + "--dataset-config", + "ardc_wave_nrt.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/argo_to_parquet.py b/aodn_cloud_optimised/bin/argo_to_parquet.py index 6ac83ed..6937479 100755 --- a/aodn_cloud_optimised/bin/argo_to_parquet.py +++ b/aodn_cloud_optimised/bin/argo_to_parquet.py @@ -1,54 +1,31 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.ArgoHandler import ArgoHandler -from aodn_cloud_optimised.lib.CommonHandler import ( - cloud_optimised_creation_loop, - cloud_optimised_creation, -) -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "argo_core.json" - ) - ) - ) - - # test file with timestamp issues and deletion of previous parquet objects - # cloud_optimised_creation('IMOS/Argo/dac/incois/2902093/2902093_prof.nc', - # dataset_config=dataset_config, - # handler_class=ArgoHandler, - # force_old_pq_del=True) - - # Lots of ram usage - # cloud_optimised_creation('IMOS/Argo/dac/coriolis/3902120/3902120_prof.nc', - # dataset_config=dataset_config, - # handler_class=ArgoHandler, - # force_old_pq_del=True) - - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - - # organisations = ["kordi", "csiro", "bodc", "csio", "incois", "jma", "coriolis", "aoml", "nmdis", "meds", "kma"] - organisations = ["nmdis", "meds", "kma"] - - for org in organisations: - argo_core_ls = s3_ls( - BUCKET_RAW_DEFAULT, f"IMOS/Argo/dac/{org}", suffix="_prof.nc" - ) - - cloud_optimised_creation_loop( - argo_core_ls, dataset_config=dataset_config, handler_class=ArgoHandler - ) - - -if __name__ == "__main__": - main() + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/Argo/dac/kordi", + "IMOS/Argo/dac/csiro", + "IMOS/Argo/dac/bodc", + "IMOS/Argo/dac/csio", + "IMOS/Argo/dac/incois", + "IMOS/Argo/dac/jma", + "IMOS/Argo/dac/coriolis", + "IMOS/Argo/dac/aoml", + "IMOS/Argo/dac/nmdis", + "IMOS/Argo/dac/meds", + "IMOS/Argo/dac/kma", + "--suffix", + "_prof.nc", + "--dataset-config", + "argo_core.json", + "--clear-existing-data", + "--force-previous-parquet-deletion", + "--cluster-mode", + "remote", + ] + + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py b/aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py new file mode 100644 index 0000000..2f0e75c --- /dev/null +++ b/aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +""" +Script to process S3 paths and create cloud-optimized datasets. + +This script allows you to specify S3 paths and various options to process +datasets and create cloud-optimised versions. It provides filtering options +and supports different cluster modes. + +Usage Examples: + generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA' \ + --filters '_hourly-timeseries_' 'FV02' --dataset-config 'anmn_hourly_timeseries.json' \ + --clear-existing-data --cluster-mode 'remote' + + generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/QLD' \ + --dataset-config 'anmn_ctd_ts_fv01.json' + + generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024' \ + --dataset-config 'acorn_gridded_qc_turq.json' --clear-existing-data --cluster-mode 'remote' + +""" + +import argparse +import importlib.resources +from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation +from aodn_cloud_optimised.lib.config import ( + load_variable_from_config, + load_dataset_config, +) +from aodn_cloud_optimised.lib.s3Tools import s3_ls + + +def main(): + parser = argparse.ArgumentParser( + description="Process S3 paths and create cloud-optimized datasets.", + epilog="Examples:\n" + " generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA' --filters '_hourly-timeseries_' 'FV02' --dataset-config 'anmn_hourly_timeseries.json' --clear-existing-data --cluster-mode 'remote'\n" + " generic_cloud_optimised_creation --paths 'IMOS/ANMN/NSW' 'IMOS/ANMN/QLD' --dataset-config 'anmn_ctd_ts_fv01.json'\n" + " generic_cloud_optimised_creation --paths 'IMOS/ACORN/gridded_1h-avg-current-map_QC/TURQ/2024' --dataset-config 'acorn_gridded_qc_turq.json' --clear-existing-data --cluster-mode 'remote'\n", + formatter_class=argparse.RawTextHelpFormatter, + ) + + parser.add_argument( + "--paths", + nargs="+", + required=True, + help="List of S3 paths to process. Example: 'IMOS/ANMN/NSW' 'IMOS/ANMN/PA'", + ) + parser.add_argument( + "--filters", + nargs="*", + default=[], + help="Optional filter strings to apply on the S3 paths. Example: '_hourly-timeseries_' 'FV02'", + ) + parser.add_argument( + "--suffix", + default=".nc", + help="Optional suffix used by s3_ls to filter S3 objects. Default is .nc. Example: '.nc'", + ) + parser.add_argument( + "--dataset-config", + required=True, + help="Path to the dataset config JSON file. Example: 'anmn_hourly_timeseries.json'", + ) + parser.add_argument( + "--clear-existing-data", + action="store_true", + help="Flag to clear existing data. Default is False.", + ) + parser.add_argument( + "--force-previous-parquet-deletion", + action="store_true", + help="Flag to force the search of previous equivalent parquet file created. Much slower. Default is False.", + ) + parser.add_argument( + "--cluster-mode", + default="local", + choices=["local", "remote"], + help="Cluster mode to use. Options: 'local' or 'remote'. Default is 'local'.", + ) + + args = parser.parse_args() + + BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") + + # Gather S3 paths + nc_obj_ls = [] + for path in args.paths: + nc_obj_ls += s3_ls(BUCKET_RAW_DEFAULT, path, suffix=args.suffix) + + # Apply filters + for filter_str in args.filters: + nc_obj_ls = [s for s in nc_obj_ls if filter_str in s] + + # Load dataset config + dataset_config_path = args.dataset_config + dataset_config = load_dataset_config( + str( + importlib.resources.path( + "aodn_cloud_optimised.config.dataset", dataset_config_path + ) + ) + ) + + # Call cloud_optimised_creation + cloud_optimised_creation( + nc_obj_ls, + dataset_config=dataset_config, + handler_class=None, + clear_existing_data=args.clear_existing_data, + force_previous_parquet_deletion=args.force_previous_parquet_deletion, + cluster_mode=args.cluster_mode, + ) + + +if __name__ == "__main__": + main() diff --git a/aodn_cloud_optimised/bin/gsla_nrt_to_zarr.py b/aodn_cloud_optimised/bin/gsla_nrt_to_zarr.py old mode 100644 new mode 100755 index 628f1dd..da9ab78 --- a/aodn_cloud_optimised/bin/gsla_nrt_to_zarr.py +++ b/aodn_cloud_optimised/bin/gsla_nrt_to_zarr.py @@ -1,40 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.GenericZarrHandler import GenericHandler -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/OceanCurrent/GSLA/NRT/2024") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "gsla_nrt.json" - ) - ) - ) - # cloud_optimised_creation_loop([nc_obj_ls[0]], - # dataset_config=dataset_config, - # reprocess=True - # ) - - # cloud_optimised_creation_loop(nc_obj_ls[1:], - # dataset_config=dataset_config, - # ) - - # rechunking - GenericHandler( - input_object_key=nc_obj_ls[0], - dataset_config=dataset_config, - ).rechunk() + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/OceanCurrent/GSLA/NRT/2024", + "--dataset-config", + "gsla_nrt.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] - if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/soop_xbt_nrt_to_parquet.py b/aodn_cloud_optimised/bin/soop_xbt_nrt_to_parquet.py index fe2acb0..12502e4 100755 --- a/aodn_cloud_optimised/bin/soop_xbt_nrt_to_parquet.py +++ b/aodn_cloud_optimised/bin/soop_xbt_nrt_to_parquet.py @@ -1,27 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/SOOP/SOOP-XBT/REALTIME/") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "soop_xbt_nrt.json" - ) - ) - ) - cloud_optimised_creation_loop(nc_obj_ls, dataset_config=dataset_config) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/SOOP/SOOP-XBT/REALTIME/", + "--dataset-config", + "soop_xbt_nrt.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/srs_l3s_1d_dn_to_zarr.py b/aodn_cloud_optimised/bin/srs_l3s_1d_dn_to_zarr.py old mode 100644 new mode 100755 index 84ad81a..a6c1852 --- a/aodn_cloud_optimised/bin/srs_l3s_1d_dn_to_zarr.py +++ b/aodn_cloud_optimised/bin/srs_l3s_1d_dn_to_zarr.py @@ -1,34 +1,23 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2024") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "srs_l3s_1d_dn.json" - ) - ) - ) - cloud_optimised_creation_loop( - [nc_obj_ls[0]], dataset_config=dataset_config, reprocess=True - ) - - cloud_optimised_creation_loop( - nc_obj_ls[1:], - dataset_config=dataset_config, - ) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2019", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2020", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2021", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2022", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2023", + "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2024", + "--dataset-config", + "srs_l3s_1d_dn.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/srs_l3s_3d_dn_to_zarr.py b/aodn_cloud_optimised/bin/srs_l3s_3d_dn_to_zarr.py new file mode 100755 index 0000000..b20b54a --- /dev/null +++ b/aodn_cloud_optimised/bin/srs_l3s_3d_dn_to_zarr.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +import subprocess + + +def main(): + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/SRS/SST/ghrsst/L3S-3d/dn/2022", + "--dataset-config", + "srs_l3s_3d_dn.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] + + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/bin/srs_oc_ljco_to_parquet.py b/aodn_cloud_optimised/bin/srs_oc_ljco_to_parquet.py index d036b22..333d533 100755 --- a/aodn_cloud_optimised/bin/srs_oc_ljco_to_parquet.py +++ b/aodn_cloud_optimised/bin/srs_oc_ljco_to_parquet.py @@ -1,27 +1,18 @@ #!/usr/bin/env python3 -import importlib.resources - -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop -from aodn_cloud_optimised.lib.config import ( - load_variable_from_config, - load_dataset_config, -) -from aodn_cloud_optimised.lib.s3Tools import s3_ls +import subprocess def main(): - BUCKET_RAW_DEFAULT = load_variable_from_config("BUCKET_RAW_DEFAULT") - nc_obj_ls = s3_ls(BUCKET_RAW_DEFAULT, "IMOS/SRS/OC/LJCO/WQM-daily/") - - dataset_config = load_dataset_config( - str( - importlib.resources.path( - "aodn_cloud_optimised.config.dataset", "srs_oc_ljco_wqm_daily.json" - ) - ) - ) - cloud_optimised_creation_loop(nc_obj_ls, dataset_config=dataset_config) - + command = [ + "generic_cloud_optimised_creation", + "--paths", + "IMOS/SRS/OC/LJCO/WQM-daily/", + "--dataset-config", + "srs_oc_ljco_wqm_daily.json", + "--clear-existing-data", + "--cluster-mode", + "remote", + ] -if __name__ == "__main__": - main() + # Run the command + subprocess.run(command, check=True) diff --git a/aodn_cloud_optimised/config/common.json b/aodn_cloud_optimised/config/common.json index 3c92500..8f17b53 100644 --- a/aodn_cloud_optimised/config/common.json +++ b/aodn_cloud_optimised/config/common.json @@ -1,7 +1,7 @@ { "BUCKET_RAW_DEFAULT": "imos-data", "BUCKET_OPTIMISED_DEFAULT": "imos-data-lab-optimised", - "ROOT_PREFIX_CLOUD_OPTIMISED_PATH": "parquet/loz_test", + "ROOT_PREFIX_CLOUD_OPTIMISED_PATH": "cloud_optimised/cluster_testing", "BUCKET_INTEGRATION_TESTING_RAW_DEFAULT": "imos-data", "BUCKET_INTEGRATION_TESTING_OPTIMISED_DEFAULT": "imos-data-lab-optimised", "ROOT_PREFIX_CLOUD_OPTIMISED_INTEGRATION_TESTING_PATH": "cloud_optimised/integration_testing" diff --git a/aodn_cloud_optimised/config/dataset/aatams_acoustic_tagging.json b/aodn_cloud_optimised/config/dataset/aatams_acoustic_tagging.json index ed4dc2d..da9b102 100644 --- a/aodn_cloud_optimised/config/dataset/aatams_acoustic_tagging.json +++ b/aodn_cloud_optimised/config/dataset/aatams_acoustic_tagging.json @@ -2,6 +2,16 @@ "dataset_name": "aatams_acoustic_tagging", "logger_name": "aatams_acoustic_tagging", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [4, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "4a97bd11-e821-4682-8b20-cb69201f3223", "gattrs_to_variables": [], "partition_keys": ["transmitter_id", "timestamp", "polygon"], diff --git a/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_main.json b/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_main.json new file mode 100644 index 0000000..8ccc482 --- /dev/null +++ b/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_main.json @@ -0,0 +1,136 @@ +{ + "dataset_name": "acorn_gridded_qc", + "logger_name": "acorn_gridded_qc", + "cloud_optimised_format": "zarr", + "cluster_options" : { + "n_workers": [2, 8], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.medium", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + "cluster_config" : { + "n_workers": [0, 6], + "scheduler_vm_types": "t3.medium" + }, + "metadata_uuid": "", + "dimensions": { + "time": {"name": "TIME", + "chunk": 1500, + "rechunk": true}, + "latitude": {"name": "J", + "chunk": 60}, + "longitude": {"name": "I", + "chunk": 59} + }, + "var_template_shape": "UCUR", + "vars_to_drop_no_common_dimension": ["I", "J", "LATITUDE", "LONGITUDE", "GDOP"], + "schema": { + "TIME": {"type": "datetime64[ns]"}, + "I": {"type": "int32"}, + "J": {"type": "int32"}, + "LATITUDE": {"type": "float64"}, + "LONGITUDE": {"type": "float64"}, + "GDOP": {"type": "float32"}, + "UCUR": {"type": "float32"}, + "VCUR": {"type": "float32"}, + "UCUR_sd": {"type": "float32"}, + "VCUR_sd": {"type": "float32"}, + "NOBS1": {"type": "float32"}, + "NOBS2": {"type": "float32"}, + "UCUR_quality_control": {"type": "float32"}, + "VCUR_quality_control": {"type": "float32"} + }, + "dataset_gattrs": { + "title": "Temperature logger" + }, + "aws_opendata_registry": { + "Name": "", + "Description": "", + "Documentation": "", + "Contact": "", + "ManagedBy": "", + "UpdateFrequency": "", + "Tags": [], + "License": "", + "Resources": [ + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "", + "Explore": [] + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + } + ], + "DataAtWork": { + "Tutorials": [ + { + "Title": "", + "URL": "", + "Services": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Tools & Applications": [ + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Publications": [ + { + "Title": "", + "URL": "", + "AuthorName": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "" + } + ] + } + } +} diff --git a/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_turq.json b/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_turq.json index 249f5b3..bd2c5f1 100644 --- a/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_turq.json +++ b/aodn_cloud_optimised/config/dataset/acorn_gridded_qc_turq.json @@ -1,122 +1,6 @@ { "dataset_name": "acorn_gridded_qc_turq", + "parent_config": "acorn_gridded_qc_main.json", "logger_name": "acorn_gridded_qc_turq", - "cloud_optimised_format": "zarr", - "metadata_uuid": "", - "dimensions": { - "time": {"name": "TIME", - "chunk": 1500, - "rechunk": true}, - "latitude": {"name": "J", - "chunk": 60}, - "longitude": {"name": "I", - "chunk": 59} - }, - "var_template_shape": "UCUR", - "vars_to_drop_no_common_dimension": ["I", "J", "LATITUDE", "LONGITUDE", "GDOP"], - "schema": { - "TIME": {"type": "datetime64[ns]"}, - "I": {"type": "int32"}, - "J": {"type": "int32"}, - "LATITUDE": {"type": "float64"}, - "LONGITUDE": {"type": "float64"}, - "GDOP": {"type": "float32"}, - "UCUR": {"type": "float32"}, - "VCUR": {"type": "float32"}, - "UCUR_sd": {"type": "float32"}, - "VCUR_sd": {"type": "float32"}, - "NOBS1": {"type": "float32"}, - "NOBS2": {"type": "float32"}, - "UCUR_quality_control": {"type": "float32"}, - "VCUR_quality_control": {"type": "float32"} - }, - "dataset_gattrs": { - "title": "Temperature logger" - }, - "aws_opendata_registry": { - "Name": "", - "Description": "", - "Documentation": "", - "Contact": "", - "ManagedBy": "", - "UpdateFrequency": "", - "Tags": [], - "License": "", - "Resources": [ - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "", - "Explore": [] - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - } - ], - "DataAtWork": { - "Tutorials": [ - { - "Title": "", - "URL": "", - "Services": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Tools & Applications": [ - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Publications": [ - { - "Title": "", - "URL": "", - "AuthorName": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "" - } - ] - } - } + "metadata_uuid": "" } diff --git a/aodn_cloud_optimised/config/dataset/anfog_slocum_glider.json b/aodn_cloud_optimised/config/dataset/anfog_slocum_glider.json index 65b5839..28e2af8 100644 --- a/aodn_cloud_optimised/config/dataset/anfog_slocum_glider.json +++ b/aodn_cloud_optimised/config/dataset/anfog_slocum_glider.json @@ -2,6 +2,16 @@ "dataset_name": "anfog_slocum_glider", "logger_name": "anfog_slocum_glider", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [4, 20], + "scheduler_vm_types": "t3.medium", + "worker_vm_types": "t3.xlarge", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "a681fdba-c6d9-44ab-90b9-113b0ed03536", "gattrs_to_variables": [ "deployment_code" diff --git a/aodn_cloud_optimised/config/dataset/anmn_ctd_ts_fv01.json b/aodn_cloud_optimised/config/dataset/anmn_ctd_ts_fv01.json index 4ce5b3b..7f80e5d 100644 --- a/aodn_cloud_optimised/config/dataset/anmn_ctd_ts_fv01.json +++ b/aodn_cloud_optimised/config/dataset/anmn_ctd_ts_fv01.json @@ -2,6 +2,16 @@ "dataset_name": "anmn_ctd_ts_fv01", "logger_name": "anmn_ctd_ts_fv01", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "7b901002-b1dc-46c3-89f2-b4951cedca48", "gattrs_to_variables": [ "site_code" diff --git a/aodn_cloud_optimised/config/dataset/anmn_hourly_timeseries.json b/aodn_cloud_optimised/config/dataset/anmn_hourly_timeseries.json index 540184d..259b7be 100644 --- a/aodn_cloud_optimised/config/dataset/anmn_hourly_timeseries.json +++ b/aodn_cloud_optimised/config/dataset/anmn_hourly_timeseries.json @@ -1,7 +1,18 @@ { "dataset_name": "anmn_hourly_timeseries", "logger_name": "anmn_hourly_timeseries", + "handler_class": "AnmnHourlyTsHandler", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "64GB" } + }, "metadata_uuid": "7b901002-b1dc-46c3-89f2-b4951cedca48", "gattrs_to_variables": [ "site_code" @@ -437,6 +448,45 @@ "long_name": "std data value in the bin, after rejection of flagged data", "cell_methods": "TIME:std" }, + "DOX1_count": { + "type": "float", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water number_of_observations", + "units": "1", + "long_name": "count data value in the bin, after rejection of flagged data", + "cell_methods": "TIME:count" + }, + "DOX1_max": { + "type": "float", + "units": "umol l-1", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water", + "long_name": "max data value in the bin, after rejection of flagged data", + "cell_methods": "TIME:max" + }, + "DOX1_min": { + "type": "float", + "units": "umol l-1", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water", + "long_name": "min data value in the bin, after rejection of flagged data", + "cell_methods": "TIME:min" + }, + "DOX1_std": { + "type": "float", + "units": "umol l-1", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water", + "long_name": "std data value in the bin, after rejection of flagged data", + "cell_methods": "TIME:std" + }, + "DOX2": { + "type": "float", + "ancillary_variables": "DOX2_min DOX2_max DOX2_std DOX2_count", + "comment": "Originally expressed in ml/l, assuming 1ml/l = 44.660umol/l and using density computed from Temperature, Salinity and Pressure with the CSIRO SeaWater library (EOS-80) v1.1.", + "long_name": "mean moles_of_oxygen_per_unit_mass_in_sea_water", + "standard_name": "moles_of_oxygen_per_unit_mass_in_sea_water", + "units": "umol kg-1", + "valid_max": 1000.0, + "valid_min": 0.0, + "cell_methods": "TIME:mean (interval: 1 hr comment: time mid point)" + }, "timestamp": { "type": "int64" }, diff --git a/aodn_cloud_optimised/config/dataset/anmn_temperature_logger_ts_fv01.json b/aodn_cloud_optimised/config/dataset/anmn_temperature_logger_ts_fv01.json index bbb7136..e35cb28 100644 --- a/aodn_cloud_optimised/config/dataset/anmn_temperature_logger_ts_fv01.json +++ b/aodn_cloud_optimised/config/dataset/anmn_temperature_logger_ts_fv01.json @@ -2,6 +2,16 @@ "dataset_name": "anmn_temperature_logger_ts_fv01", "logger_name": "anmn_temperature_logger_ts_fv01", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "7e13b5f3-4a70-4e31-9e95-335efa491c5c", "gattrs_to_variables": [ "site_code" diff --git a/aodn_cloud_optimised/config/dataset/ardc_wave_nrt.json b/aodn_cloud_optimised/config/dataset/ardc_wave_nrt.json index f988641..554d6cb 100644 --- a/aodn_cloud_optimised/config/dataset/ardc_wave_nrt.json +++ b/aodn_cloud_optimised/config/dataset/ardc_wave_nrt.json @@ -2,6 +2,16 @@ "dataset_name": "ardc_wave_nrt", "logger_name": "ardc_wave_nrt", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "2807f3aa-4db0-4924-b64b-354ae8c10b58", "gattrs_to_variables": [ "site_name", diff --git a/aodn_cloud_optimised/config/dataset/argo_core.json b/aodn_cloud_optimised/config/dataset/argo_core.json index 887c446..5fc92fa 100644 --- a/aodn_cloud_optimised/config/dataset/argo_core.json +++ b/aodn_cloud_optimised/config/dataset/argo_core.json @@ -1,7 +1,18 @@ { "dataset_name": "argo_core", "logger_name": "argo_core", + "handler_class": "ArgoHandler", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "4402cb50-e20a-44ee-93e6-4728259250d2", "gattrs_to_variables": [], "partition_keys": [ diff --git a/aodn_cloud_optimised/config/dataset/dataset_template.json b/aodn_cloud_optimised/config/dataset/dataset_template.json index 4310a35..f0738b2 100644 --- a/aodn_cloud_optimised/config/dataset/dataset_template.json +++ b/aodn_cloud_optimised/config/dataset/dataset_template.json @@ -4,6 +4,17 @@ "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + // (Optional) The associated geonetwork metadata record uuid "metadata_uuid": "b12b3-123bb-iijww", diff --git a/aodn_cloud_optimised/config/dataset/gsla_nrt.json b/aodn_cloud_optimised/config/dataset/gsla_nrt.json index fd3d546..cf3e578 100644 --- a/aodn_cloud_optimised/config/dataset/gsla_nrt.json +++ b/aodn_cloud_optimised/config/dataset/gsla_nrt.json @@ -2,6 +2,16 @@ "dataset_name": "gsla_nrt", "logger_name": "gsla_nrt", "cloud_optimised_format": "zarr", + "cluster_options" : { + "n_workers": [2, 8], + "scheduler_vm_types": "t3.medium", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "", "dimensions": { "time": { diff --git a/aodn_cloud_optimised/config/dataset/soop_xbt_nrt.json b/aodn_cloud_optimised/config/dataset/soop_xbt_nrt.json index 096b5a6..06aac5b 100644 --- a/aodn_cloud_optimised/config/dataset/soop_xbt_nrt.json +++ b/aodn_cloud_optimised/config/dataset/soop_xbt_nrt.json @@ -2,6 +2,16 @@ "dataset_name": "soop_xbt_nrt", "logger_name": "soop_xbt_nrt", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "35234913-aa3c-48ec-b9a4-77f822f66ef8", "gattrs_to_variables": [ "XBT_line", diff --git a/aodn_cloud_optimised/config/dataset/srs_ghrsst_main.json b/aodn_cloud_optimised/config/dataset/srs_ghrsst_main.json new file mode 100644 index 0000000..1fdfed2 --- /dev/null +++ b/aodn_cloud_optimised/config/dataset/srs_ghrsst_main.json @@ -0,0 +1,138 @@ +{ + "dataset_name": "srs_ghrsst", + "logger_name": "srs_ghrsst", + "cloud_optimised_format": "zarr", + "cluster_options" : { + "n_workers": [2, 8], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.medium", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + "metadata_uuid": "", + "dimensions": { + "time": {"name": "time", + "chunk": 5, + "rechunk": true}, + "latitude": {"name": "lat", + "chunk": 1000}, + "longitude": {"name": "lon", + "chunk": 1000} + }, + "var_template_shape": "sea_surface_temperature", + "vars_to_drop_no_common_dimension": ["lat", "lon"], + "schema": { + "lon": {"type": "float32"}, + "lat": {"type": "float32"}, + "time": {"type": "datetime64[ns]"}, + "sea_surface_temperature": {"type": "float32"}, + "sea_surface_temperature_day_night": {"type": "float32", "drop_vars": true}, + "sst_dtime": {"type": "float64"}, + "dt_analysis": {"type": "float32"}, + "wind_speed": {"type": "float32", "drop_vars": true}, + "wind_speed_dtime_from_sst": {"type": "float32", "drop_vars": true}, + "sea_ice_fraction": {"type": "float32", "drop_vars": true}, + "sea_ice_fraction_dtime_from_sst": {"type": "float32", "drop_vars": true}, + "satellite_zenith_angle": {"type": "float32"}, + "l2p_flags": {"type": "float32"}, + "quality_level": {"type": "float32"}, + "sses_bias": {"type": "float32"}, + "sses_standard_deviation": {"type": "float32"}, + "sses_count": {"type": "float32"}, + "sst_count": {"type": "float32"}, + "sst_mean": {"type": "float32"}, + "sst_standard_deviation": {"type": "float32"} + }, + "dataset_gattrs": { + "title": "Temperature logger" + }, + "aws_opendata_registry": { + "Name": "", + "Description": "", + "Documentation": "", + "Contact": "", + "ManagedBy": "", + "UpdateFrequency": "", + "Tags": [], + "License": "", + "Resources": [ + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "", + "Explore": [] + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + } + ], + "DataAtWork": { + "Tutorials": [ + { + "Title": "", + "URL": "", + "Services": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Tools & Applications": [ + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Publications": [ + { + "Title": "", + "URL": "", + "AuthorName": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "" + } + ] + } + } +} diff --git a/aodn_cloud_optimised/config/dataset/srs_l3s_1d_dn.json b/aodn_cloud_optimised/config/dataset/srs_l3s_1d_dn.json index 89e7f44..d9f3d2d 100644 --- a/aodn_cloud_optimised/config/dataset/srs_l3s_1d_dn.json +++ b/aodn_cloud_optimised/config/dataset/srs_l3s_1d_dn.json @@ -1,128 +1,6 @@ { - "dataset_name": "srs_l3s_1d_dn", - "logger_name": "srs_l3s_1d_dn", - "cloud_optimised_format": "zarr", - "metadata_uuid": "", - "dimensions": { - "time": {"name": "time", - "chunk": 10, - "rechunk": true}, - "latitude": {"name": "lat", - "chunk": 1000}, - "longitude": {"name": "lon", - "chunk": 1000} - }, - "var_template_shape": "sea_surface_temperature", - "vars_to_drop_no_common_dimension": ["lat", "lon"], - "schema": { - "lon": {"type": "float32"}, - "lat": {"type": "float32"}, - "time": {"type": "datetime64[ns]"}, - "sea_surface_temperature": {"type": "float32"}, - "sea_surface_temperature_day_night": {"type": "float32"}, - "sst_dtime": {"type": "float64"}, - "dt_analysis": {"type": "float32"}, - "wind_speed": {"type": "float32"}, - "wind_speed_dtime_from_sst": {"type": "float32"}, - "sea_ice_fraction": {"type": "float32"}, - "sea_ice_fraction_dtime_from_sst": {"type": "float32"}, - "satellite_zenith_angle": {"type": "float32"}, - "l2p_flags": {"type": "float32"}, - "quality_level": {"type": "float32"}, - "sses_bias": {"type": "float32"}, - "sses_standard_deviation": {"type": "float32"}, - "sses_count": {"type": "float32"}, - "sst_count": {"type": "float32"}, - "sst_mean": {"type": "float32"}, - "sst_standard_deviation": {"type": "float32"} - }, - "dataset_gattrs": { - "title": "Temperature logger" - }, - "aws_opendata_registry": { - "Name": "", - "Description": "", - "Documentation": "", - "Contact": "", - "ManagedBy": "", - "UpdateFrequency": "", - "Tags": [], - "License": "", - "Resources": [ - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "", - "Explore": [] - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - }, - { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" - } - ], - "DataAtWork": { - "Tutorials": [ - { - "Title": "", - "URL": "", - "Services": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Tools & Applications": [ - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Publications": [ - { - "Title": "", - "URL": "", - "AuthorName": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "" - } - ] - } - } + "dataset_name": "srs_l3s_1d_dn", + "logger_name": "srs_l3s_1d_dn", + "parent_config": "srs_ghrsst_main.json", + "metadata_uuid": "" } diff --git a/aodn_cloud_optimised/config/dataset/srs_l3s_3d_dn.json b/aodn_cloud_optimised/config/dataset/srs_l3s_3d_dn.json new file mode 100644 index 0000000..b4a70fc --- /dev/null +++ b/aodn_cloud_optimised/config/dataset/srs_l3s_3d_dn.json @@ -0,0 +1,6 @@ +{ + "dataset_name": "srs_l3s_3d_dn", + "logger_name": "srs_l3s_3d_dn", + "parent_config": "srs_ghrsst_main.json", + "metadata_uuid": "" +} diff --git a/aodn_cloud_optimised/config/dataset/srs_oc_ljco_wqm_daily.json b/aodn_cloud_optimised/config/dataset/srs_oc_ljco_wqm_daily.json index 7234f1e..772147d 100644 --- a/aodn_cloud_optimised/config/dataset/srs_oc_ljco_wqm_daily.json +++ b/aodn_cloud_optimised/config/dataset/srs_oc_ljco_wqm_daily.json @@ -2,6 +2,16 @@ "dataset_name": "srs_oc_ljco_wqm_daily", "logger_name": "srs_oc_ljco_wqm_daily", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "e4ac6bf81-cd37-4611-8da8-4d5ae5e2bda", "gattrs_to_variables": [ "site_code" diff --git a/aodn_cloud_optimised/config/schema_validation_parquet.json b/aodn_cloud_optimised/config/schema_validation_parquet.json index 9acb184..1b7b5d4 100644 --- a/aodn_cloud_optimised/config/schema_validation_parquet.json +++ b/aodn_cloud_optimised/config/schema_validation_parquet.json @@ -1,108 +1,141 @@ { + "type": "object", + "properties": { + "dataset_name": {"type": "string"}, + "logger_name": {"type": "string"}, + "cloud_optimised_format": {"type": "string"}, + "cluster_options": { "type": "object", "properties": { - "dataset_name": {"type": "string"}, - "logger_name": {"type": "string"}, - "metadata_uuid": {"type": "string"}, - "gattrs_to_variables": {"type": "array", "items": {"type": "string"}}, - "partition_keys": {"type": "array", "items": {"type": "string"}}, - "time_extent": { - "type": "object", - "properties": { - "time": {"type": "string"}, - "partition_timestamp_period": {"type": "string"} - } - }, - "spatial_extent": { - "type": "object", - "properties": { - "lat": {"type": "string"}, - "lon": {"type": "string"}, - "spatial_resolution": {"type": "integer"} - } - }, - "schema": { - "type": "object", - "properties": { - "timestamp": {"type": "object"}, - "polygon": {"type": "object"}, - "filename": {"type": "object"} - } + "n_workers": { + "type": "array", + "items": {"type": "integer"}, + "minItems": 2, + "maxItems": 2 }, - "dataset_gattrs": { + "scheduler_vm_types": {"type": "string"}, + "worker_vm_types": {"type": "string"}, + "allow_ingress_from": {"type": "string"}, + "compute_purchase_option": {"type": "string"}, + "worker_options": { "type": "object", "properties": { - "title": {"type": "string"} + "nthreads": {"type": "integer"}, + "memory_limit": {"type": "string"} + }, + "required": ["nthreads", "memory_limit"] + } + }, + "required": ["n_workers", "scheduler_vm_types", "worker_vm_types", "allow_ingress_from", "compute_purchase_option", "worker_options"] + }, + "metadata_uuid": {"type": "string"}, + "gattrs_to_variables": {"type": "array", "items": {"type": "string"}}, + "partition_keys": {"type": "array", "items": {"type": "string"}}, + "time_extent": { + "type": "object", + "properties": { + "time": {"type": "string"}, + "partition_timestamp_period": {"type": "string"} + }, + "required": ["time", "partition_timestamp_period"] + }, + "spatial_extent": { + "type": "object", + "properties": { + "lat": {"type": "string"}, + "lon": {"type": "string"}, + "spatial_resolution": {"type": "integer"} + }, + "required": ["lat", "lon", "spatial_resolution"] + }, + "schema": { + "type": "object", + "properties": { + "timestamp": {"type": "object"}, + "polygon": {"type": "object"}, + "filename": {"type": "object"} + }, + "required": ["timestamp", "polygon", "filename"] + }, + "dataset_gattrs": { + "type": "object", + "properties": { + "title": {"type": "string"} + }, + "required": ["title"] + }, + "force_old_pq_del": {"type": "boolean"}, + "aws_opendata_registry": { + "type": "object", + "properties": { + "Name": {"type": "string"}, + "Description": {"type": "string"}, + "Documentation": {"type": "string"}, + "Contact": {"type": "string"}, + "ManagedBy": {"type": "string"}, + "UpdateFrequency": {"type": "string"}, + "Tags": {"type": "array", "items": {"type": "string"}}, + "License": {"type": "string"}, + "Resources": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Description": {"type": "string"}, + "ARN": {"type": "string"}, + "Region": {"type": "string"}, + "Type": {"type": "string"}, + "Explore": {"type": "array", "items": {"type": "string"}} + }, + "required": ["Description", "ARN", "Region", "Type"] } }, - "force_old_pq_del": {"type": "boolean"}, - "aws_opendata_registry": { + "DataAtWork": { "type": "object", "properties": { - "Name": {"type": "string"}, - "Description": {"type": "string"}, - "Documentation": {"type": "string"}, - "Contact": {"type": "string"}, - "ManagedBy": {"type": "string"}, - "UpdateFrequency": {"type": "string"}, - "Tags": {"type": "array", "items": {"type": "string"}}, - "License": {"type": "string"}, - "Resources": { + "Tutorials": { "type": "array", "items": { "type": "object", "properties": { - "Description": {"type": "string"}, - "ARN": {"type": "string"}, - "Region": {"type": "string"}, - "Type": {"type": "string"}, - "Explore": {"type": "array", "items": {"type": "string"}} - } + "Title": {"type": "string"}, + "URL": {"type": "string"}, + "Services": {"type": "string"}, + "AuthorName": {"type": "string"}, + "AuthorURL": {"type": "string"} + }, + "required": ["Title", "URL"] } }, - "DataAtWork": { - "type": "object", - "properties": { - "Tutorials": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Title": {"type": "string"}, - "URL": {"type": "string"}, - "Services": {"type": "string"}, - "AuthorName": {"type": "string"}, - "AuthorURL": {"type": "string"} - } - } + "Tools & Applications": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Title": {"type": "string"}, + "URL": {"type": "string"}, + "AuthorName": {"type": "string"}, + "AuthorURL": {"type": "string"} }, - "Tools & Applications": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Title": {"type": "string"}, - "URL": {"type": "string"}, - "AuthorName": {"type": "string"}, - "AuthorURL": {"type": "string"} - } - } + "required": ["Title", "URL"] + } + }, + "Publications": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Title": {"type": "string"}, + "URL": {"type": "string"}, + "AuthorName": {"type": "string"} }, - "Publications": { - "type": "array", - "items": { - "type": "object", - "properties": { - "Title": {"type": "string"}, - "URL": {"type": "string"}, - "AuthorName": {"type": "string"} - } - } - } + "required": ["Title", "URL"] } } } } - }, - "required": ["dataset_name", "cloud_optimised_format", "time_extent", "spatial_extent", "metadata_uuid", "schema"] + } + } + }, + "required": ["dataset_name", "cluster_options", "cloud_optimised_format", "time_extent", "spatial_extent", "metadata_uuid", "schema"] } diff --git a/aodn_cloud_optimised/config/schema_validation_zarr.json b/aodn_cloud_optimised/config/schema_validation_zarr.json index 7d133a8..96f23ab 100644 --- a/aodn_cloud_optimised/config/schema_validation_zarr.json +++ b/aodn_cloud_optimised/config/schema_validation_zarr.json @@ -3,6 +3,31 @@ "properties": { "dataset_name": {"type": "string"}, "logger_name": {"type": "string"}, + "cloud_optimised_format": {"type": "string"}, + "cluster_options": { + "type": "object", + "properties": { + "n_workers": { + "type": "array", + "items": {"type": "integer"}, + "minItems": 2, + "maxItems": 2 + }, + "scheduler_vm_types": {"type": "string"}, + "worker_vm_types": {"type": "string"}, + "allow_ingress_from": {"type": "string"}, + "compute_purchase_option": {"type": "string"}, + "worker_options": { + "type": "object", + "properties": { + "nthreads": {"type": "integer"}, + "memory_limit": {"type": "string"} + }, + "required": ["nthreads", "memory_limit"] + } + }, + "required": ["n_workers", "scheduler_vm_types", "worker_vm_types", "allow_ingress_from", "compute_purchase_option", "worker_options"] + }, "metadata_uuid": {"type": "string"}, "dimensions": { "type": "object", @@ -114,5 +139,5 @@ } } }, - "required": ["dataset_name", "cloud_optimised_format", "dimensions","var_template_shape","vars_to_drop_no_common_dimension", "schema"] + "required": ["dataset_name", "cluster_options", "cloud_optimised_format", "dimensions","var_template_shape","vars_to_drop_no_common_dimension", "schema"] } diff --git a/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py b/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py index 364e4f0..11488ef 100644 --- a/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py +++ b/aodn_cloud_optimised/lib/AnmnHourlyTsHandler.py @@ -9,36 +9,50 @@ class AnmnHourlyTsHandler(GenericHandler): def __init__(self, **kwargs): super().__init__(**kwargs) - # TODO: rename JULD variable to TIME? or just copy it so that it's more consistent with other dataset? def preprocess_data( self, netcdf_fp ) -> Generator[Tuple[pd.DataFrame, xr.Dataset], None, None]: - if self.is_valid_netcdf(netcdf_fp): - # Use open_dataset as a context manager to ensure proper handling of the dataset - with xr.open_dataset(netcdf_fp) as ds: - # Convert xarray to pandas DataFrame - assert set(ds.dims) == { - "OBSERVATION", - "INSTRUMENT", - }, f"Unexpected dimensions {ds.dims.keys()}" - - df = ds.drop_dims("INSTRUMENT").to_dataframe() - instrument_info = ds.drop_dims("OBSERVATION").to_dataframe() - - assert df.shape[1] + instrument_info.shape[1] == len( - ds.variables - ), "Some variable depends on both dimensions" - - df = df.join(instrument_info, on="instrument_index") - - assert df.shape[1] == len( - ds.variables - ), "Something went wrong with the join" - - # Decode strings from bytes - for col, dtype in df.dtypes.items(): - if dtype == object: - df[col] = df[col].astype(str) - - yield df, ds + """ + Preprocess a NetCDF file containing Mooring Hourly timeseries product data. + + This method reads a NetCDF file, typically used for Mooring Hourly timeseries products, + and processes it to yield a tuple of a pandas DataFrame and an xarray Dataset. + + The DataFrame contains the profile data with instrument information merged based on + the 'instrument_index'. This method ensures proper handling of the dataset using + a context manager and checks for expected dimensions and variables. + + :param netcdf_fp: Path to the input NetCDF file, or an open S3 file object (using s3fs). + :return: Generator yielding tuples of (DataFrame, Dataset) where DataFrame contains + the profile data with instrument information, and Dataset is the corresponding + xarray Dataset. + """ + + # Use open_dataset as a context manager to ensure proper handling of the dataset + with xr.open_dataset(netcdf_fp, engine="h5netcdf") as ds: + # Convert xarray to pandas DataFrame + assert set(ds.dims) == { + "OBSERVATION", + "INSTRUMENT", + }, f"Unexpected dimensions {ds.dims.keys()}" + + df = ds.drop_dims("INSTRUMENT").to_dataframe() + instrument_info = ds.drop_dims("OBSERVATION").to_dataframe() + + assert df.shape[1] + instrument_info.shape[1] == len( + ds.variables + ), "Some variable depends on both dimensions" + + df = df.join(instrument_info, on="instrument_index") + + assert df.shape[1] == len( + ds.variables + ), "Something went wrong with the join" + + # Decode strings from bytes + for col, dtype in df.dtypes.items(): + if dtype == object: + df[col] = df[col].astype(str) + + yield df, ds diff --git a/aodn_cloud_optimised/lib/ArgoHandler.py b/aodn_cloud_optimised/lib/ArgoHandler.py index 96a3eba..e898c73 100755 --- a/aodn_cloud_optimised/lib/ArgoHandler.py +++ b/aodn_cloud_optimised/lib/ArgoHandler.py @@ -16,11 +16,18 @@ def preprocess_data( self, netcdf_fp ) -> Generator[Tuple[pd.DataFrame, xr.Dataset], None, None]: """ - Read a profile *_prof.nc which is an aggregation of multiple profiles files and returns a dataframe - :param netcdf_fp: input NetCDF filepath of an argo *_prof.nc file - :return: dataframe containing profile data + Preprocess a NetCDF file containing aggregated profile data. + + This method reads a profile NetCDF file (typically named with a *_prof.nc suffix), + which is an aggregation of multiple profile files, and returns a generator + yielding a tuple of a pandas DataFrame and an xarray Dataset. + + :param netcdf_fp: Path to the input NetCDF file, or an open S3 file object (using s3fs) of an Argo *_prof.nc file. + :return: Generator yielding tuples of (DataFrame, Dataset) where DataFrame contains the profile data + and Dataset is the corresponding xarray Dataset. """ - if not self.input_object_key.endswith("_prof.nc"): + + if not netcdf_fp.path.endswith("_prof.nc"): raise ValueError with xr.open_dataset(netcdf_fp) as ds: diff --git a/aodn_cloud_optimised/lib/CommonHandler.py b/aodn_cloud_optimised/lib/CommonHandler.py index 2bb6f74..0313ace 100644 --- a/aodn_cloud_optimised/lib/CommonHandler.py +++ b/aodn_cloud_optimised/lib/CommonHandler.py @@ -1,12 +1,15 @@ +import importlib import os -import tempfile import timeit from typing import List import boto3 -import netCDF4 +import s3fs import xarray as xr import yaml +from coiled import Cluster +from dask.distributed import Client +from dask.distributed import LocalCluster from jsonschema import validate, ValidationError from .config import load_variable_from_config, load_dataset_config @@ -20,19 +23,37 @@ def __init__(self, **kwargs): Args: **kwargs: Additional keyword arguments. - raw_bucket_name (str, optional[config]): Name of the raw bucket. - optimised_bucket_name (str, optional[config]): Name of the optimised bucket. - root_prefix_cloud_optimised_path (str, optional[config]): Root Prefix path of the location of cloud optimised files - input_object_key (str): Key of the input object. - force_old_pq_del (bool, optional[config]): Force the deletion of existing cloud optimised files(slow) (default=False) + optimised_bucket_name (str, optional): Name of the optimised bucket. Defaults to the value in the configuration. + root_prefix_cloud_optimised_path (str, optional): Root prefix path of the location of cloud optimised files. Defaults to the value in the configuration. + force_previous_parquet_deletion (bool, optional): Force the deletion of existing cloud optimised files (slow). Defaults to False. + cluster_mode (str, optional): Specifies the type of cluster to create ("remote", "local", or None). Defaults to "local". + dataset_config (dict): Configuration dictionary for the dataset. + clear_existing_data (bool, optional): Flag to clear existing data. Defaults to None. + + Attributes: + start_time (float): The start time of the handler. + optimised_bucket_name (str): Name of the optimised bucket. + root_prefix_cloud_optimised_path (str): Root prefix path of the location of cloud optimised files. + cluster_mode (str): Specifies the type of cluster to create ("remote", "local", or None). + dataset_config (dict): Configuration dictionary for the dataset. + cloud_optimised_format (str): Format for cloud optimised files. + dataset_name (str): Name of the dataset. + schema (dict): Schema of the dataset. + logger (logging.Logger): Logger for logging information, warnings, and errors. + cloud_optimised_output_path (str): S3 path for cloud optimised output. + clear_existing_data (bool): Flag to clear existing data. + cluster_options (dict): Options for the cluster configuration. + s3_fs (s3fs.S3FileSystem): S3 file system object for accessing S3. + Raises: + ValueError: If an invalid cluster_mode is specified. """ self.start_time = timeit.default_timer() - self.temp_dir = tempfile.TemporaryDirectory() - self.raw_bucket_name = kwargs.get( - "raw_bucket_name", load_variable_from_config("BUCKET_RAW_DEFAULT") - ) + # TODO: remove this variable, not used anymore. + # self.raw_bucket_name = kwargs.get( + # "raw_bucket_name", load_variable_from_config("BUCKET_RAW_DEFAULT") + # ) self.optimised_bucket_name = kwargs.get( "optimised_bucket_name", load_variable_from_config("BUCKET_OPTIMISED_DEFAULT"), @@ -42,7 +63,14 @@ def __init__(self, **kwargs): load_variable_from_config("ROOT_PREFIX_CLOUD_OPTIMISED_PATH"), ) - self.input_object_key = kwargs.get("input_object_key", None) + # Cluster options + valid_clusters = ["remote", "local", None] + self.cluster_mode = kwargs.get("cluster_mode", "local") + + if self.cluster_mode not in valid_clusters: + raise ValueError( + f"Invalid cluster value: {self.cluster_mode}. Valid values are {valid_clusters}" + ) self.dataset_config = kwargs.get("dataset_config") @@ -58,12 +86,157 @@ def __init__(self, **kwargs): cloud_optimised_format = self.dataset_config.get("cloud_optimised_format") self.cloud_optimised_output_path = f"s3://{os.path.join(self.optimised_bucket_name, self.root_prefix_cloud_optimised_path, self.dataset_name + '.' + cloud_optimised_format)}/" - if self.input_object_key is not None: - self.filename = os.path.basename(self.input_object_key) - self.tmp_input_file = self.get_s3_raw_obj() - else: - self.logger.error("No input object given") - raise ValueError + self.clear_existing_data = kwargs.get( + "clear_existing_data", None + ) # setting to True will recreate the zarr from scratch at every run! + + self.cluster_options = self.dataset_config.get("cluster_options", None) + + self.s3_fs = s3fs.S3FileSystem( + anon=False + ) # variable overwritten in unittest to use moto server + + def __enter__(self): + # Initialize resources if necessary + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Release any resources held by the handler_nc_anmn_file + self.close() + + def close(self): + # Release resources + for name in dir(): + if not name.startswith("_"): + # del globals()[name] + self.logger.info(f"{name} has not been deleted") + import gc + + gc.collect() + + def create_cluster(self): + """ + Create a Dask cluster based on the specified cluster_mode. + + This method creates a Dask cluster either remotely using the Coiled service or locally + depending on the value of the cluster_mode attribute. If remote cluster creation fails, + it falls back to creating a local cluster. + + Attributes: + cluster_mode (str): Specifies the type of cluster to create ("remote" or "local"). + logger (logging.Logger): Logger for logging information, warnings, and errors. + dataset_config (dict): Configuration dictionary containing cluster options. + dataset_name (str): Name of the dataset used for naming the remote cluster. + cluster (Cluster): The created Dask cluster (either remote or local). + client (Client): Dask client connected to the created cluster. + + Raises: + ValueError: If an invalid cluster_mode is specified. + + Returns: + Tuple[Client, Cluster]: A tuple containing the Dask client and the created cluster. + + Notes: + - If self.client and self.cluster become instance attributes, they can't be used with + self.client.submit as they can't be serialised. + + """ + + # TODO: quite crazy, but if client and cluster become self.client and self.cluster, then they can't be used + # with self.client.submit as they can't be serialize ... what a bloody pain in .. seriously + + local_cluster_options = self.dataset_config.get( + "local_cluster_options", + { + "n_workers": 2, + "memory_limit": "8GB", + "threads_per_worker": 2, + }, + ) + + if self.cluster_mode == "remote": + try: + self.logger.info("Creating a remote cluster") + cluster_options = self.dataset_config.get("cluster_options", None) + if cluster_options is None: + self.logger.error("No cluster options provided in dataset_config") + + cluster_options["name"] = f"Processing_{self.dataset_name}" + + cluster = Cluster(**cluster_options) + client = Client(cluster) + self.logger.info( + f"Coiled Cluster dask dashboard available at {cluster.dashboard_link}" + ) + + except Exception as e: + self.logger.warning( + f"Could not create a Coiled cluster: {e}. Falling back to local cluster." + ) + # Create a local Dask cluster as a fallback + cluster = LocalCluster(**local_cluster_options) + client = Client(cluster) + self.logger.info( + f"Local Cluster dask dashboard available at {cluster.dashboard_link}" + ) + elif self.cluster_mode == "local": + self.logger.info("Creating a local cluster") + + cluster = LocalCluster(**local_cluster_options) + client = Client(cluster) + self.logger.info( + f"Local Cluster dask dashboard available at {cluster.dashboard_link}" + ) + + return client, cluster + + def close_cluster(self, client, cluster): + """ + Close the Dask cluster and client. + + This method attempts to close the Dask client and cluster if they are currently open. + It logs successful closure operations and catches any exceptions that occur during + the process, logging them as errors. + + Attributes: + client (Client): The Dask client connected to the cluster. + cluster (Cluster): The Dask cluster (either remote or local). + logger (logging.Logger): Logger for logging information and errors. + + Logs: + Info: Logs a message when the Dask client and cluster are closed successfully. + Error: Logs a message if there is an error while closing the Dask client or cluster. + """ + try: + client.close() + self.logger.info("Dask client closed successfully.") + + cluster.close() + self.logger.info("Dask cluster closed successfully.") + except Exception as e: + self.logger.error(f"Error while closing the cluster or client: {e}") + + @staticmethod + def batch_process_fileset(fileset, batch_size=10): + """ + Processes a list of files in batches. + + This method yields successive batches of files from the input fileset. + Each batch contains up to `batch_size` files. Adjusting `batch_size` + can impact memory usage and performance, potentially leading to out-of-memory errors. Be cautious. + + Args: + fileset (list): A list of files to be processed in batches. + batch_size (int, optional): The number of files to include in each batch (default is 10). + + Yields: + list: A sublist of `fileset` containing up to `batch_size` files. + """ + # batch_size modification could lead to some out of mem + num_files = len(fileset) + for start_idx in range(0, num_files, batch_size): + end_idx = min(start_idx + batch_size, num_files) + yield fileset[start_idx:end_idx] def validate_json(self, json_validation_path): """ @@ -100,55 +273,37 @@ def validate_json(self, json_validation_path): schema = load_dataset_config(json_validation_path) try: validate(instance=self.dataset_config, schema=schema) - self.logger.info("JSON configuration for dataset: Validation successful.") + self.logger.info( + f"JSON configuration for dataset {os.path.basename(json_validation_path)}: Validation successful." + ) except ValidationError as e: - raise ValueError(f"JSON configuration for dataset: Validation failed: {e}") - - def is_valid_netcdf(self, nc_file_path): - """ - Check if a file is a valid NetCDF file. - - Parameters: - - file_path (str): The path to the NetCDF file. - - Returns: - - bool: True if the file is a valid NetCDF file, False otherwise. - """ - if not self.input_object_key.endswith(".nc"): - self.logger.error( - f"{self.filename}: Not valid NetCDF file. Not ending with .nc" + raise ValueError( + f"JSON configuration for dataset {os.path.basename(json_validation_path)}: Validation failed: {e}" ) - raise ValueError - - try: - netCDF4.Dataset(nc_file_path) - return True - except Exception as e: - self.logger.error(f"{self.filename}: Not valid NetCDF file: {e}.") - raise TypeError - - def get_s3_raw_obj(self) -> str: - """ - Download an S3 object from the raw bucket to a temporary file. - - :return: Local filepath of the temporary file. - :rtype: str - """ - - s3 = boto3.client("s3") - # Construct the full path for the temporary file - temp_file_path = os.path.join( - self.temp_dir.name, os.path.basename(self.input_object_key) - ) - - # Download the S3 object to the temporary file - s3.download_file(self.raw_bucket_name, self.input_object_key, temp_file_path) - - self.logger.info( - f"{self.filename}: Downloading {self.input_object_key} object from {self.raw_bucket_name} bucket" - ) - return temp_file_path + # TODO: remove as not used anymore + # def is_valid_netcdf(self, nc_file_path): + # """ + # Check if a file is a valid NetCDF file. + # + # Parameters: + # - file_path (str): The path to the NetCDF file. + # + # Returns: + # - bool: True if the file is a valid NetCDF file, False otherwise. + # """ + # if not self.input_object_key.endswith(".nc"): + # self.logger.error( + # f"{self.filename}: Not valid NetCDF file. Not ending with .nc" + # ) + # raise ValueError + # + # try: + # netCDF4.Dataset(nc_file_path) + # return True + # except Exception as e: + # self.logger.error(f"{self.filename}: Not valid NetCDF file: {e}.") + # raise TypeError @staticmethod def is_open_ds(ds: xr.Dataset) -> bool: @@ -168,39 +323,40 @@ def is_open_ds(ds: xr.Dataset) -> bool: except RuntimeError: return False # If a RuntimeError is raised, the Dataset is closed - def push_metadata_aws_registry(self) -> None: - """ - Pushes metadata to the AWS OpenData Registry. - - If the 'aws_opendata_registry' key is missing from the dataset configuration, a warning is logged. - Otherwise, the metadata is extracted from the 'aws_opendata_registry' key, converted to YAML format, - and uploaded to the specified S3 bucket. - - Returns: - None - """ - if "aws_opendata_registry" not in self.dataset_config: - self.logger.warning( - "Missing dataset configuration to populate AWS OpenData Registry" - ) - else: - aws_registry_config = self.dataset_config["aws_opendata_registry"] - yaml_data = yaml.dump(aws_registry_config) - - s3 = boto3.client("s3") - - key = os.path.join( - self.root_prefix_cloud_optimised_path, self.dataset_name + ".yaml" - ) - # Upload the YAML data to S3 - s3.put_object( - Bucket=self.optimised_bucket_name, - Key=key, - Body=yaml_data.encode("utf-8"), - ) - self.logger.info( - f"Push AWS Registry file to: {os.path.join(self.root_prefix_cloud_optimised_path, self.dataset_name + '.yaml')}" - ) + # TODO: this is not the way aws registry files are created. To remove/modify + # def push_metadata_aws_registry(self) -> None: + # """ + # Pushes metadata to the AWS OpenData Registry. + # + # If the 'aws_opendata_registry' key is missing from the dataset configuration, a warning is logged. + # Otherwise, the metadata is extracted from the 'aws_opendata_registry' key, converted to YAML format, + # and uploaded to the specified S3 bucket. + # + # Returns: + # None + # """ + # if "aws_opendata_registry" not in self.dataset_config: + # self.logger.warning( + # "Missing dataset configuration to populate AWS OpenData Registry" + # ) + # else: + # aws_registry_config = self.dataset_config["aws_opendata_registry"] + # yaml_data = yaml.dump(aws_registry_config) + # + # s3 = boto3.client("s3") + # + # key = os.path.join( + # self.root_prefix_cloud_optimised_path, self.dataset_name + ".yaml" + # ) + # # Upload the YAML data to S3 + # s3.put_object( + # Bucket=self.optimised_bucket_name, + # Key=key, + # Body=yaml_data.encode("utf-8"), + # ) + # self.logger.info( + # f"Push AWS Registry file to: {os.path.join(self.root_prefix_cloud_optimised_path, self.dataset_name + '.yaml')}" + # ) def postprocess(self, ds: xr.Dataset) -> None: """ @@ -215,15 +371,25 @@ def postprocess(self, ds: xr.Dataset) -> None: if self.is_open_ds(ds): ds.close() - if os.path.exists(self.tmp_input_file): - os.remove(self.tmp_input_file) - if os.path.exists(self.temp_dir.name): - self.temp_dir.cleanup() - self.logger.handlers.clear() def _get_generic_handler_class(dataset_config): + """ + Determine the appropriate handler_nc_anmn_file class based on the dataset configuration. + + Args: + dataset_config (dict): A dictionary containing the configuration of the dataset. The key + "cloud_optimised_format" should be set to either "zarr" or "parquet" + to specify the format. + + Returns: + class: The handler_nc_anmn_file class corresponding to the specified cloud-optimized format. + + Raises: + ValueError: If the "cloud_optimised_format" is not specified or is neither "zarr" + nor "parquet". + """ from .GenericParquetHandler import GenericHandler as parquet_handler from .GenericZarrHandler import GenericHandler as zarr_handler @@ -239,82 +405,40 @@ def _get_generic_handler_class(dataset_config): return handler_class -def cloud_optimised_creation(obj_key: str, dataset_config, **kwargs) -> None: - """ - Create Cloud Optimised files for a specific object key in an S3 bucket. - - Args: - obj_key (str): The object key (file path) of the NetCDF file to process. - dataset_config (dictionary): dataset configuration. Check config/dataset_template.json for example - **kwargs: Additional keyword arguments for customization. - handler_class (class, optional): Handler class for cloud optimised creation (default is GenericHandler). - force_old_pq_del (bool, optional): Whether to force deletion of old Parquet files (default is False). - - Returns: - None - """ - handler_class = kwargs.get("handler_class", None) - - # loading the right handler based on configuration - if handler_class is None: - handler_class = _get_generic_handler_class(dataset_config) - - handler_reprocess_arg = kwargs.get("handler_reprocess_arg", None) - - kwargs_handler_class = { - "raw_bucket_name": kwargs.get( - "raw_bucket_name", load_variable_from_config("BUCKET_RAW_DEFAULT") - ), - "optimised_bucket_name": kwargs.get( - "optimised_bucket_name", - load_variable_from_config("BUCKET_OPTIMISED_DEFAULT"), - ), - "root_prefix_cloud_optimised_path": kwargs.get( - "root_prefix_cloud_optimised_path", - load_variable_from_config("ROOT_PREFIX_CLOUD_OPTIMISED_PATH"), - ), - "input_object_key": obj_key, - "dataset_config": dataset_config, - "reprocess": handler_reprocess_arg, - } - - # Creating an instance of the specified class with the provided arguments - handler_instance = handler_class(**kwargs_handler_class) - - handler_instance.to_cloud_optimised() - - -def cloud_optimised_creation_loop( - obj_ls: List[str], dataset_config: dict, **kwargs +def cloud_optimised_creation( + s3_file_uri_list: List[str], dataset_config: dict, **kwargs ) -> None: """ - Iterate through a list of file paths and create Cloud Optimised files for each file. + Iterate through a list of s3 file paths and create Cloud Optimised files for each file. Args: - obj_ls (List[str]): List of file paths to process. + s3_file_uri_list (List[str]): List of file paths to process. dataset_config (dictionary): dataset configuration. Check config/dataset_template.json for example **kwargs: Additional keyword arguments for customization. handler_class (class, optional): Handler class for cloud optimised creation. - force_old_pq_del (bool, optional): Whether to force deletion of old Parquet files (default is False). + force_previous_parquet_deletion (bool, optional): Whether to force deletion of old Parquet files (default is False). Returns: None """ - handler_class = kwargs.get("handler_class", None) + # this is optional! Default will use generic handler + handler_class_name = dataset_config.get("handler_class", None) # loading the right handler based on configuration - if handler_class is None: + if handler_class_name is None: handler_class = _get_generic_handler_class(dataset_config) + else: + module = importlib.import_module( + f"aodn_cloud_optimised.lib.{handler_class_name}" + ) + handler_class = getattr(module, handler_class_name) - handler_reprocess_arg = kwargs.get("reprocess", None) + handler_clear_existing_data_arg = kwargs.get("clear_existing_data", None) # Create the kwargs_handler_class dictionary, to be used as list of arguments to call cloud_optimised_creation -> handler_class # when values need to be overwritten kwargs_handler_class = { - "raw_bucket_name": kwargs.get( - "raw_bucket_name", load_variable_from_config("BUCKET_RAW_DEFAULT") - ), "optimised_bucket_name": kwargs.get( "optimised_bucket_name", load_variable_from_config("BUCKET_OPTIMISED_DEFAULT"), @@ -323,38 +447,80 @@ def cloud_optimised_creation_loop( "root_prefix_cloud_optimised_path", load_variable_from_config("ROOT_PREFIX_CLOUD_OPTIMISED_PATH"), ), + "cluster_mode": kwargs.get("cluster_mode", "local"), } # Filter out None values filtered_kwargs = {k: v for k, v in kwargs_handler_class.items() if v is not None} - + kwargs_handler_class = filtered_kwargs logger_name = dataset_config.get("logger_name", "generic") logger = get_logger(logger_name) - start_whole_processing = timeit.default_timer() - i = 1 - for f in obj_ls: - - logger.info(f"{f}: start processing") - - start_time = timeit.default_timer() - try: - cloud_optimised_creation( - f, - dataset_config, - handler_class=handler_class, - handler_reprocess_arg=handler_reprocess_arg, - **filtered_kwargs, - ) - time_spent = timeit.default_timer() - start_time - - logger.info( - f"{i}/{len(obj_ls)}: {f} Cloud Optimised file completed in {time_spent}s" - ) - except Exception as e: - logger.error(f"{i}/{len(obj_ls)} issue with {f}: {e}") + kwargs_handler_class["dataset_config"] = dataset_config + kwargs_handler_class["clear_existing_data"] = handler_clear_existing_data_arg - i += 1 + # Creating an instance of the specified class with the provided arguments + start_whole_processing = timeit.default_timer() + with handler_class(**kwargs_handler_class) as handler_instance: + handler_instance.to_cloud_optimised(s3_file_uri_list) time_spent_processing = timeit.default_timer() - start_whole_processing logger.info(f"Whole dataset completed in {time_spent_processing}s") + + # TODO: everything seems very slow using to_cloud_optimised. Maybe let's try to use to_cloud_optimised_single below? + # and comment above or do something. Will comment for now + # + # if dataset_config.get("cloud_optimised_format") == "parquet": + # def task(f, i, handler_clear_existing_data_arg=False): + # start_time = timeit.default_timer() + # try: + # # kwargs_handler_class["input_object_key"] = f + # kwargs_handler_class["dataset_config"] = dataset_config + # kwargs_handler_class[ + # "clear_existing_data" + # ] = handler_clear_existing_data_arg + # + # # Creating an instance of the specified class with the provided arguments + # with handler_class(**kwargs_handler_class) as handler_instance: + # handler_instance.to_cloud_optimised_single(f) + # + # time_spent = timeit.default_timer() - start_time + # logger.info( + # f"{i}/{len(s3_file_uri_list)}: {f} Cloud Optimised file completed in {time_spent}s" + # ) + # + # except Exception as e: + # logger.error(f"{i}/{len(s3_file_uri_list)} issue with {f}: {e}") + # + # local_cluster_options = { + # "n_workers": 2, + # "memory_limit": "8GB", + # "threads_per_worker": 2, + # } + # + # cluster = LocalCluster(**local_cluster_options) + # client = Client(cluster) + # + # client.amm.start() # Start Active Memory Manager + # logger.info( + # f"Local Cluster dask dashboard available at {cluster.dashboard_link}" + # ) + # + # if handler_clear_existing_data_arg: + # # if handler_clear_existing_data_arg, better to wait for this task to complete before adding new data!! + # futures_init = [ + # client.submit(task, s3_file_uri_list[0], 1, handler_clear_existing_data_arg=True) + # ] + # wait(futures_init) + # + # # Parallel Execution with List Comprehension + # futures = [ + # client.submit(task, f, i) for i, f in enumerate(s3_file_uri_list[1:], start=2) + # ] + # wait(futures) + # else: + # futures = [client.submit(task, f, i) for i, f in enumerate(s3_file_uri_list, start=1)] + # wait(futures) + # + # client.close() + # cluster.close() diff --git a/aodn_cloud_optimised/lib/GenericParquetHandler.py b/aodn_cloud_optimised/lib/GenericParquetHandler.py index 9535d09..710b5f8 100755 --- a/aodn_cloud_optimised/lib/GenericParquetHandler.py +++ b/aodn_cloud_optimised/lib/GenericParquetHandler.py @@ -1,9 +1,8 @@ import importlib.resources -import json import os import re import timeit -from typing import List, Tuple, Generator +from typing import Tuple, Generator import boto3 import numpy as np @@ -12,12 +11,22 @@ import pyarrow.parquet as pq import traceback import xarray as xr -import yaml from shapely.geometry import Point, Polygon from .schema import create_pyarrow_schema, generate_json_schema_var_from_netcdf - +from aodn_cloud_optimised.lib.s3Tools import ( + delete_objects_in_prefix, + split_s3_path, + prefix_exists, + create_fileset, +) + +from aodn_cloud_optimised.lib.logging import get_logger from .CommonHandler import CommonHandler +from dask.distributed import wait + + +# TODO: improve log for parallism by adding a uuid for each task class GenericHandler(CommonHandler): @@ -27,17 +36,19 @@ def __init__(self, **kwargs): Args: **kwargs: Additional keyword arguments. - raw_bucket_name (str, optional[config]): Name of the raw bucket. optimised_bucket_name (str, optional[config]): Name of the optimised bucket. root_prefix_cloud_optimised_path (str, optional[config]): Root Prefix path of the location of cloud optimised files - input_object_key (str): Key of the input object. - force_old_pq_del (bool, optional[config]): Force the deletion of existing cloud optimised files(slow) (default=False) + force_previous_parquet_deletion (bool, optional[config]): Force the deletion of existing cloud optimised files(slow) (default=False) + + Inherits: + CommonHandler: Provides common functionality for handling cloud-optimised datasets. """ super().__init__(**kwargs) self.delete_pq_unmatch_enable = kwargs.get( - "force_old_pq_del", self.dataset_config.get("force_old_pq_del", False) + "force_previous_parquet_deletion", + self.dataset_config.get("force_previous_parquet_deletion", False), ) json_validation_path = str( @@ -64,36 +75,35 @@ def preprocess_data_csv( Preprocesses a CSV file using pandas and converts it into an xarray Dataset based on dataset configuration. Args: - csv_fp (str): File path to the CSV file to be processed. + csv_fp (str or s3fs.core.S3File): File path or s3fs object of the CSV file to be processed. Yields: Tuple[pd.DataFrame, xr.Dataset]: A generator yielding a tuple containing the processed pandas DataFrame and its corresponding xarray Dataset. - This method reads a CSV file, csv_fp using pandas read_csv function with configuration options - specified in the dataset configuration (stored in 'pandas_read_csv_config' key of self.dataset_config, expected - to be a JSON-like dictionary). The resulting DataFrame is then converted into an xarray Dataset using - xr.Dataset.from_dataframe(). - - i.e.: - "pandas_read_csv_config": { - "delimiter": ";", - "header": 0, - "index_col": "detection_timestamp", - "parse_dates": ["detection_timestamp"], - "na_values": ["N/A", "NaN"], - "encoding": "utf-8" - }, - - See pandas.read_csv Documentation for more options + This method reads a CSV file (`csv_fp`) using pandas' `read_csv` function with configuration options + specified in the dataset configuration (`pandas_read_csv_config` key of `self.dataset_config`, expected + to be a JSON-like dictionary). The resulting DataFrame (`df`) is then converted into an xarray Dataset using + `xr.Dataset.from_dataframe()`. + + Example of `pandas_read_csv_config` in dataset configuration: + ```json + "pandas_read_csv_config": { + "delimiter": ";", + "header": 0, + "index_col": "detection_timestamp", + "parse_dates": ["detection_timestamp"], + "na_values": ["N/A", "NaN"], + "encoding": "utf-8" + } + ``` The method also uses the 'schema' from the dataset configuration to assign attributes to variables in the xarray Dataset. Each variable's attributes are extracted from the 'schema' and assigned to the Dataset variable's - attributes. The 'type' attribute from the pyarrow_schema is removed from the Dataset variables' attributes since it + attributes. The 'type' attribute from the `pyarrow_schema` is removed from the Dataset variables' attributes since it is considered unnecessary. If a variable in the Dataset is not found in the schema, an error is logged. - """ if "pandas_read_csv_config" in self.dataset_config: config_from_json = self.dataset_config["pandas_read_csv_config"] @@ -126,31 +136,56 @@ def preprocess_data_netcdf( the GenericHandler class with super() for method delegation. Args: - netcdf_fp (str): Input NetCDF filepath. + netcdf_fp (str or s3fs.core.S3File): Input NetCDF filepath or s3fs object. Yields: tuple: A tuple containing DataFrame and Dataset. - """ - if self.is_valid_netcdf(netcdf_fp): - # Use open_dataset as a context manager to ensure proper handling of the dataset - with xr.open_dataset(netcdf_fp) as ds: - # Convert xarray to pandas DataFrame - df = ds.to_dataframe() - # TODO: call check function on variable attributes - if self.check_var_attributes(ds): - yield df, ds - else: - self.logger.error( - "NetCDF file is not consistent with the pre-defined schema" - ) + This method reads a NetCDF file (`netcdf_fp`) using xarray's `open_dataset` function with configuration options + specified in the dataset configuration (`netcdf_read_config` key of `self.dataset_config`, expected + to be a JSON-like dictionary). The resulting Dataset (`ds`) is converted into a pandas DataFrame (`df`) using + `ds.to_dataframe()`. + + The method also verifies variable attributes against the 'schema' from the dataset configuration. + If the attributes do not match the schema, an error is logged. + + Example of `netcdf_read_config` in dataset configuration: + ```json + "netcdf_read_config": { + "engine": "h5netcdf", + "decode_times": False + } + ``` + """ + with xr.open_dataset(netcdf_fp, engine="h5netcdf") as ds: + # Convert xarray to pandas DataFrame + df = ds.to_dataframe() + # TODO: call check function on variable attributes + if self.check_var_attributes(ds): + yield df, ds + else: + self.logger.error( + "NetCDF file is not consistent with the pre-defined schema" + ) def preprocess_data( self, fp ) -> Generator[Tuple[pd.DataFrame, xr.Dataset], None, None]: - if fp.endswith(".nc"): + """ + Overwrites the preprocess_data method from CommonHandler. + + Args: + fp (str or s3fs.core.S3File): File path or S3 file object. + + Yields: + tuple: A tuple containing DataFrame and Dataset. + + If `fp` ends with ".nc", it delegates to `self.preprocess_data_netcdf(fp)`. + If `fp` ends with ".csv", it delegates to `self.preprocess_data_csv(fp)`. + """ + if fp.path.endswith(".nc"): return self.preprocess_data_netcdf(fp) - if fp.endswith(".csv"): + if fp.path.endswith(".csv"): return self.preprocess_data_csv(fp) @staticmethod @@ -329,7 +364,7 @@ def _add_timestamp_df(self, df: pd.DataFrame) -> pd.DataFrame: return df - def _add_columns_df(self, df: pd.DataFrame, ds: xr.Dataset) -> pd.DataFrame: + def _add_columns_df(self, df: pd.DataFrame, ds: xr.Dataset, f) -> pd.DataFrame: """ Adds filename column to the DataFrame as well as variables defined in the json config. @@ -340,7 +375,6 @@ def _add_columns_df(self, df: pd.DataFrame, ds: xr.Dataset) -> pd.DataFrame: Returns: pd.DataFrame: DataFrame with added columns. """ - gattrs_to_variables = self.dataset_config["gattrs_to_variables"] for attr in gattrs_to_variables: if attr in ds.attrs: @@ -350,11 +384,11 @@ def _add_columns_df(self, df: pd.DataFrame, ds: xr.Dataset) -> pd.DataFrame: f"{attr} global attribute doesn't exist in the original NetCDF. The corresponding variable won't be created" ) - df["filename"] = os.path.basename(self.input_object_key) + df["filename"] = os.path.basename(f.path) return df - def _rm_bad_timestamp_df(self, df: pd.DataFrame) -> pd.DataFrame: + def _rm_bad_timestamp_df(self, df: pd.DataFrame, f) -> pd.DataFrame: """ Remove rows with bad timestamps from the DataFrame. @@ -374,7 +408,7 @@ def _rm_bad_timestamp_df(self, df: pd.DataFrame) -> pd.DataFrame: if any(df["timestamp"] < 0): self.logger.warning( - f"{self.filename}: NaN values of {time_varname} time variable in dataset. Trimming data from NaN values" + f"{f.path}: NaN values of {time_varname} time variable in dataset. Trimming data from NaN values" ) df2 = df[df["timestamp"] > 0].copy() df = df2 @@ -489,14 +523,13 @@ def check_var_attributes(self, ds): return True def publish_cloud_optimised( - self, - df: pd.DataFrame, - ds: xr.Dataset, + self, df: pd.DataFrame, ds: xr.Dataset, s3_file_handle ) -> None: """ Create a parquet file containing data only. Args: + s3_file_handle: s3_file_handle df (pd.DataFrame): The pandas DataFrame containing the data. ds (Dataset): The dataset object. Returns: @@ -505,9 +538,8 @@ def publish_cloud_optimised( partition_keys = self.dataset_config["partition_keys"] df = self._add_timestamp_df(df) - df = self._add_columns_df(df, ds) - df = self._rm_bad_timestamp_df(df) - + df = self._add_columns_df(df, ds, s3_file_handle) + df = self._rm_bad_timestamp_df(df, s3_file_handle) if "polygon" in partition_keys: if not "spatial_extent" in self.dataset_config: self.logger.error("Missing spatial_extent config") @@ -515,6 +547,8 @@ def publish_cloud_optimised( else: df = self._add_polygon(df) + filename = os.path.basename(s3_file_handle.path) + # Needs to be specified here as df is here a pandas df, while later on, it is a pyarrow table. some renaming should happen if isinstance(df.index, pd.MultiIndex): df_var_list = df.columns.tolist() + [name for name in df.index.names] @@ -549,14 +583,14 @@ def publish_cloud_optimised( # df.cast fails complaining that the schemas are different while they're arent. different order is often the case pdf = self.cast_table_by_schema(pdf, subset_schema) except ValueError as e: - self.logger.error(f"{self.filename}: {type(e).__name__}") + self.logger.error(f"{filename}: {type(e).__name__}") # Part B: Create NaN arrays for missing columns in the pyarrow table by comparing the self.pyarrow_schema variable if self.pyarrow_schema is not None: for field in self.pyarrow_schema: if field.name not in df_var_list: self.logger.warning( - f"{self.filename}: {field.name} variable missing from dataset. creating a null array of {field.type}" + f"{filename}: {field.name} variable missing from input file. creating a null array of {field.type}" ) null_array = pa.nulls(len(pdf), field.type) pdf = pdf.append_column(field.name, null_array) @@ -567,12 +601,12 @@ def publish_cloud_optimised( for column_name in df_columns: if column_name not in pdf.schema.names: var_config = generate_json_schema_var_from_netcdf( - self.tmp_input_file, column_name + s3_file_handle, column_name, s3_fs=self.s3_fs ) # if df.index.name is not None and column_name in df.index.name: # self.logger.warning(f'missing variable from provided pyarrow_schema, please add {column_name} : {df.index.dtype}') # else: - # #TODO: improce this to return all the varatts as well + # #TODO: improve this to return all the varatts as well # var_config = generate_json_schema_var_from_netcdf(self.input_object_key, column_name) self.logger.warning( f"missing variable from provided pyarrow_schema config, please add to dataset config (respect double quotes): {var_config}" @@ -589,16 +623,17 @@ def publish_cloud_optimised( pq.write_to_dataset( pdf, root_path=self.cloud_optimised_output_path, + filesystem=self.s3_fs, existing_data_behavior="overwrite_or_ignore", row_group_size=20000, partition_cols=partition_keys, use_threads=True, metadata_collector=metadata_collector, - basename_template=os.path.basename(self.input_object_key) + basename_template=filename + "-{i}.parquet", # this is essential for the overwriting part ) self.logger.info( - f"{self.filename}: Parquet files successfully created in {self.cloud_optimised_output_path} \n" + f"{filename}: Parquet files successfully created in {self.cloud_optimised_output_path} \n" ) self._add_metadata_sidecar() @@ -738,47 +773,17 @@ def _add_metadata_sidecar(self) -> None: dataset_metadata_path = os.path.join( self.cloud_optimised_output_path, "_common_metadata" ) - pq.write_metadata(pdf_schema, dataset_metadata_path) + pq.write_metadata( + pdf_schema, + dataset_metadata_path, + filesystem=self.s3_fs, + ) self.logger.info( - f"{self.filename}: Parquet metadata file successfully created in {dataset_metadata_path} \n" + f"Parquet metadata file successfully created in {dataset_metadata_path} \n" ) - def push_metadata_aws_registry(self) -> None: - """ - Pushes metadata to the AWS OpenData Registry. - - If the 'aws_opendata_registry' key is missing from the dataset configuration, a warning is logged. - Otherwise, the metadata is extracted from the 'aws_opendata_registry' key, converted to YAML format, - and uploaded to the specified S3 bucket. - - Returns: - None - """ - if "aws_opendata_registry" not in self.dataset_config: - self.logger.warning( - "Missing dataset configuration to populate AWS OpenData Registry" - ) - else: - aws_registry_config = self.dataset_config["aws_opendata_registry"] - yaml_data = yaml.dump(aws_registry_config) - - s3 = boto3.client("s3") - - key = os.path.join( - self.root_prefix_cloud_optimised_path, self.dataset_name + ".yaml" - ) - # Upload the YAML data to S3 - s3.put_object( - Bucket=self.optimised_bucket_name, - Key=key, - Body=yaml_data.encode("utf-8"), - ) - self.logger.info( - f"Push AWS Registry file to: {os.path.join(self.cloud_optimised_output_path, self.dataset_name + '.yaml')}" - ) - - def delete_existing_matching_parquet(self) -> None: + def delete_existing_matching_parquet(self, filename) -> None: """ Delete unmatched Parquet files. @@ -802,15 +807,20 @@ def delete_existing_matching_parquet(self) -> None: # remote test on local machine shows 15 sec for 50k objects try: + # TODO: with moto and unittests, we get the following error: + # GetFileInfo() yielded path 'imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/site_code=SYD140/timestamp=1625097600/polygon=01030000000100000005000000000000000020624000000000008041C0000000000060634000000000008041C0000000000060634000000000000039C0000000000020624000000000000039C0000000000020624000000000008041C0/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc-0.parquet', which is outside base dir 's3://imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/' + # obviously the file to delete is found with the unittests, but there is an issue, maybe with the way filesystem is set. Reading with pandas works, but we don't have the same capabilities parquet_files = pq.ParquetDataset( - self.cloud_optimised_output_path, partitioning="hive" + self.cloud_optimised_output_path, + partitioning="hive", + filesystem=self.s3_fs, ) - except FileNotFoundError as e: + except Exception as e: self.logger.info(f"No files to delete: {e}") return # Define the regex pattern to match existing parquet files - pattern = rf"\/{self.filename}" + pattern = rf"\/{filename}" # Find files matching the pattern using list comprehension and regex matching_files = [ @@ -835,33 +845,62 @@ def delete_existing_matching_parquet(self) -> None: f"Previous parquet objects successfully deleted: {response}" ) - def to_cloud_optimised(self) -> None: + def to_cloud_optimised_single(self, s3_file_uri) -> None: """ - Create Parquet files from a NetCDF file. + Process a single NetCDF file from an S3 URI, converting it into Parquet format. + + Args: + s3_file_uri (str): The S3 URI of the NetCDF file to process. Returns: None + + This method processes a NetCDF file located at `s3_file_uri`: + - Logs the processing start. + - Deletes existing matching Parquet files if enabled (`self.delete_pq_unmatch_enable`). + - Creates a fileset from the S3 file URI. + - Calls `self.preprocess_data()` to preprocess the data, yielding DataFrame and Dataset. + - Publishes the cloud-optimised data using `self.publish_cloud_optimised()`. + - Performs post-processing tasks using `self.postprocess()`. + - Logs completion time and finalises the process. + + If any exception occurs during processing, it logs the error and raises the exception. + + Note: + - Uses the logger defined in `self.logger`. + - Uses configurations and settings from `self.dataset_config`. """ - if self.tmp_input_file.endswith(".nc"): - self.is_valid_netcdf( - self.tmp_input_file - ) # check file validity before doing anything else + # FIXME: the next 2 lines need to be here otherwise, the logging is lost when called within a dask task. Why?? + logger_name = self.dataset_config.get("logger_name", "generic") + self.logger = get_logger(logger_name) + + self.logger.info(f"Processing {s3_file_uri}") + filename = os.path.basename(s3_file_uri) if self.delete_pq_unmatch_enable: - self.delete_existing_matching_parquet() + self.delete_existing_matching_parquet(filename) try: - generator = self.preprocess_data(self.tmp_input_file) + start_time = timeit.default_timer() + + s3_file_handle = create_fileset(s3_file_uri, self.s3_fs)[0] # only one file + + generator = self.preprocess_data(s3_file_handle) for df, ds in generator: - self.publish_cloud_optimised(df, ds) - self.push_metadata_aws_registry() + self.publish_cloud_optimised(df, ds, s3_file_handle) + # self.push_metadata_aws_registry() # Deprecated time_spent = timeit.default_timer() - self.start_time self.logger.info(f"Cloud Optimised file completed in {time_spent}s") self.postprocess(ds) + time_spent = timeit.default_timer() - start_time + self.logger.info( + f"{s3_file_uri} Cloud Optimised file completed in {time_spent}s" + ) + except Exception as e: self.logger.error( f"Issue while creating Cloud Optimised file: {type(e).__name__}: {e} \n {traceback.print_exc()}" @@ -871,3 +910,63 @@ def to_cloud_optimised(self) -> None: self.postprocess(ds) raise e + + def to_cloud_optimised(self, s3_file_uri_list) -> None: + """ + Process a list of NetCDF files from S3 URIs, converting them into Parquet format in batches. + + Args: + s3_file_uri_list (list): List of S3 URIs of NetCDF files to process. + + Returns: + None + + This method processes a list of NetCDF files located at `s3_file_uri_list`: + - Deletes existing Parquet files if `self.clear_existing_data` is set to True. + - Logs deletion of existing Parquet files if they exist. + - Creates a Dask cluster and submits tasks to process each file URI in batches. + - Waits for batch tasks to complete using a timeout of 10 minutes. + - Closes the Dask cluster after all tasks are completed. + + Note: + - Uses the logger defined in `self.logger`. + - Uses configurations and settings from `self.dataset_config`. + """ + if self.clear_existing_data: + self.logger.info( + f"Creating new Parquet dataset - DELETING existing all Parquet objects if exist" + ) + if prefix_exists(self.cloud_optimised_output_path): + bucket_name, prefix = split_s3_path(self.cloud_optimised_output_path) + self.logger.info( + f"Deleting existing Parquet objects from {self.cloud_optimised_output_path}" + ) + delete_objects_in_prefix(bucket_name, prefix) + + def task(f, i): + try: + self.to_cloud_optimised_single(f) + except Exception as e: + self.logger.error(f"{i}/{len(s3_file_uri_list)} issue with {f}: {e}") + + client, cluster = self.create_cluster() + + # Get the minimum cluster worker value as a batch size? and multiply it by 2 ? + n_workers_list = self.dataset_config.get("cluster_options", {}).get( + "n_workers", [] + ) + + # Get the minimum value from n_workers list + min_n_workers = min(n_workers_list) if n_workers_list else None + batch_size = min_n_workers * 3 + + # Do it in batches. maybe more efficient + for i in range(0, len(s3_file_uri_list), batch_size): + batch = s3_file_uri_list[i : i + batch_size] + batch_tasks = [ + client.submit(task, f, idx + 1) for idx, f in enumerate(batch) + ] + + wait(batch_tasks, timeout="10 minutes") + + self.close_cluster(client, cluster) diff --git a/aodn_cloud_optimised/lib/GenericZarrHandler.py b/aodn_cloud_optimised/lib/GenericZarrHandler.py index d1f7c8f..df28bff 100644 --- a/aodn_cloud_optimised/lib/GenericZarrHandler.py +++ b/aodn_cloud_optimised/lib/GenericZarrHandler.py @@ -1,23 +1,64 @@ import importlib.resources -import timeit -import traceback +import os +import warnings from functools import partial import boto3 +import dask import fsspec import numpy as np import s3fs import xarray as xr import zarr - -# from dask import distributed from dask.diagnostics import ProgressBar -from dask.distributed import worker_client from dask.distributed import Client - from rechunker import rechunk +from xarray.core.merge import MergeError + +from aodn_cloud_optimised.lib.CommonHandler import CommonHandler +from aodn_cloud_optimised.lib.logging import get_logger +from aodn_cloud_optimised.lib.s3Tools import ( + delete_objects_in_prefix, + split_s3_path, + prefix_exists, + create_fileset, +) + + +def preprocess_xarray(ds, dataset_config): + """ + Perform preprocessing on the input dataset (`ds`) and return an xarray Dataset. + + :param ds: Input xarray Dataset. + :param dataset_config: Configuration dictionary for the dataset. + + :return: + Preprocessed xarray Dataset. + """ + # TODO: this is part a rewritten function available in the GenericHandler class below. + # running the class method with xarray as preprocess=self.preprocess_xarray lead to many issues + # 1) serialization of the arguments with pickle. + # 2) Running in a dask remote cluster, it seemed like the preprocess function (if donne within mfdataset) + # was actually running locally and using ALL of the local ram. Complete nonsense. So this function was made + # as a test. It should be run after the xarray dataset is opened. More testing required as + # self.preprocess_xarray() was pretty complete function. + + logger_name = dataset_config.get("logger_name", "generic") + dimensions = dataset_config.get("dimensions") + schema = dataset_config.get("schema") + + logger = get_logger(logger_name) -from .CommonHandler import CommonHandler + # TODO: get filename; Should be from https://github.com/pydata/xarray/issues/9142 + + # ds = ds.assign( + # filename=((dimensions["time"]["name"],), [filename]) + # ) # add new filename variable with time dimension + + vars_to_drop = set(ds.data_vars) - set(schema) + ds_filtered = ds.drop_vars(vars_to_drop) + ds = ds_filtered + return ds class GenericHandler(CommonHandler): @@ -27,10 +68,12 @@ def __init__(self, **kwargs): Args: **kwargs: Additional keyword arguments. - raw_bucket_name (str, optional[config]): Name of the raw bucket. optimised_bucket_name (str, optional[config]): Name of the optimised bucket. root_prefix_cloud_optimised_path (str, optional[config]): Root Prefix path of the location of cloud optimised files - input_object_key (str): Key of the input object. + + Inherits: + CommonHandler: Provides common functionality for handling cloud-optimised datasets. + """ super().__init__(**kwargs) @@ -43,10 +86,6 @@ def __init__(self, **kwargs): json_validation_path ) # we cannot validate the json config until self.dataset_config and self.logger are set - self.reprocess = kwargs.get( - "reprocess", None - ) # setting to True will recreate the zarr from scratch at every run! - self.dimensions = self.dataset_config.get("dimensions") self.rechunk_drop_vars = kwargs.get("rechunk_drop_vars", None) self.vars_to_drop_no_common_dimension = self.dataset_config.get( @@ -54,57 +93,35 @@ def __init__(self, **kwargs): ) self.chunks = { + self.dimensions["time"]["name"]: self.dimensions["time"]["chunk"], self.dimensions["latitude"]["name"]: self.dimensions["latitude"]["chunk"], self.dimensions["longitude"]["name"]: self.dimensions["longitude"]["chunk"], } - def check_file_already_processed(self) -> bool: - """ - Check whether a NetCDF file has been previously processed and integrated into an existing Zarr dataset. - This check is performed by examining the filename variable added to the Zarr dataset. - - If the file has been processed previously, a self.reprocessed_time_idx variable will be created - to determine the index value of the time variable region for potential overwriting. - - :returns: - - True if the filename has already been integrated. - - False if the filename has not been integrated yet. - """ - self.logger.info( - f"{self.filename}: Checking if input NetCDF has already been ingested into Zarr dataset" - ) - - # Load existing zarr dataset - try: - ds = xr.open_zarr( - fsspec.get_mapper(self.cloud_optimised_output_path, anon=True), - consolidated=True, + self.compute = bool(True) + + # TODO: fix this ugly abomination. Unfortunately, patching the s3_fs value in the unittest is not enough for + # zarr! why? it works fine for parquet, but if I remove this if condition, my unittest breaks! maybe + # self.s3_fs is overwritten somewhere?? need to check + if os.getenv("RUNNING_UNDER_UNITTEST") == "true": + self.s3_fs = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "http://127.0.0.1:5555/", + "region_name": "us-east-1", + }, ) - except Exception as e: - self.logger.warning(f"Zarr dataset does not exist") - return False - # Locate values of time indexes where new filename has possibly been already downloaded - idx = ds.indexes[self.dimensions["time"]["name"]].where( - ds.filename == self.filename + self.store = s3fs.S3Map( + root=f"{self.cloud_optimised_output_path}", s3=self.s3_fs, check=False ) - not_nan_mask = ~idx.isna() # ~np.isnan(idx) - # Use numpy.where to get the indices where the values are not NaN - indices_not_nan = np.where(not_nan_mask)[0] - if indices_not_nan.size == 1: # filename exists, file part of existing zarr - self.reprocessed_time_idx = indices_not_nan[0] - return True - - elif indices_not_nan.size == 0: - return False - - def preprocess_xarray(self, ds, filename) -> xr.Dataset: + # TODO: Unused at the moment + def preprocess_xarray(self, ds) -> xr.Dataset: """ Perform preprocessing on the input dataset (`ds`) and return an xarray Dataset. :param ds: Input xarray Dataset. - :param filename: Name of the file being processed. :return: Preprocessed xarray Dataset. @@ -116,15 +133,17 @@ def preprocess_xarray(self, ds, filename) -> xr.Dataset: ds_filtered = ds.drop_vars(vars_to_drop) ds = ds_filtered - # add a new filename variable - filename = self.filename + # https://github.com/pydata/xarray/issues/2313 + # filename = ds.encoding["source"] - self.logger.info(f"{self.filename}: xarray preprocessing") + # self.logger.info(f"{filename}: xarray preprocessing") # Add a new dimension 'filename' with a filename value - ds = ds.assign( - filename=((self.dimensions["time"]["name"],), [filename]) - ) # add new filename variable with time dimension + filename = None + if filename is not None: + ds = ds.assign( + filename=((self.dimensions["time"]["name"],), [filename]) + ) # add new filename variable with time dimension var_required = self.schema.copy() var_required.pop(self.dimensions["time"]["name"]) @@ -134,8 +153,6 @@ def preprocess_xarray(self, ds, filename) -> xr.Dataset: # TODO: make the variable below something more generic? a parameter? var_template_shape = self.dataset_config.get("var_template_shape") - import warnings - try: warnings.filterwarnings("error", category=RuntimeWarning) nan_array = np.full(ds[var_template_shape].shape, np.nan, dtype=np.float64) @@ -150,9 +167,10 @@ def preprocess_xarray(self, ds, filename) -> xr.Dataset: # if variable doesn't exist if variable_name not in ds: - self.logger.warning( - f"{self.filename}: add missing {variable_name} to xarray dataset" - ) + # self.logger.warning( + # f"{filename}: add missing {variable_name} to xarray dataset" + # ) + self.logger.warning(f"add missing {variable_name} to xarray dataset") # check the type of the variable (numerical of string) if np.issubdtype(datatype, np.number): @@ -200,141 +218,262 @@ def preprocess_xarray(self, ds, filename) -> xr.Dataset: return ds - def preprocess(self) -> xr.Dataset: + def publish_cloud_optimised_fileset_batch(self, s3_file_uri_list): """ - Create a dataframe and xarray data from a NetCDF file. Loaded in memory. + Process and publish a batch of NetCDF files stored in S3 to a Zarr dataset. - :return: - ds: xarray Dataset. - """ - - preproc = partial(self.preprocess_xarray, filename=self.filename) - ds = xr.open_mfdataset( - self.tmp_input_file, - preprocess=preproc, - engine="h5netcdf", - concat_characters=True, - mask_and_scale=True, - decode_cf=True, - decode_times=True, - use_cftime=True, - parallel=True, - # autoclose=True, - decode_coords=True, - ) + This method iterates over a list of S3 file URIs, processes them in batches, and publishes + the resulting datasets to a Zarr store on S3. It performs the following steps: - return ds + 1. Validate input parameters and initialise logging. + 2. Create a list of file handles from S3 file URIs. + 3. Iterate through batches of file handles. + 4. Perform preprocessing on each batched dataset. + 5. Drop specified variables from the dataset based on schema settings. + 6. Open and preprocess each dataset using Dask for parallel processing. + 7. Chunk the dataset according to predefined dimensions. + 8. Write the processed dataset to an existing or new Zarr store on S3. + 9. Handle merging datasets and logging errors if encountered. - def publish_cloud_optimised(self, ds): - """ - Create or update a Zarr dataset in the specified S3 bucket. + Parameters: + - s3_file_uri_list (list): List of S3 file URIs to process and publish. - :param ds: The xarray dataset to be stored in Zarr format. - :type ds: xr.Dataset + Raises: + - ValueError: If input_objects (`s3_file_uri_list`) is not defined. - :return: None + Returns: + None """ - s3 = s3fs.S3FileSystem(anon=False) + # Iterate over s3_file_handle_list in batches + if s3_file_uri_list is None: + raise ValueError("input_objects is not defined") - store = s3fs.S3Map( - root=f"{self.cloud_optimised_output_path}", s3=s3, check=False + self.logger.info( + "Listing all objects to process and create a s3_file_handle_list" ) + s3_file_handle_list = create_fileset(s3_file_uri_list, self.s3_fs) - ds = ds.chunk(chunks=self.chunks) + time_dimension_name = self.dimensions["time"]["name"] - # first file of the dataset (overwrite) - if self.reprocess: - self.logger.warning( - f"{self.filename}: Creating new Zarr dataset - OVERWRITTING existing all Zarr objects if exist" - ) + for idx, batch_files in enumerate( + self.batch_process_fileset(s3_file_handle_list) + ): + self.logger.info(f"Processing batch {idx + 1}...") + self.logger.info(batch_files) + + # batch_filenames = [os.path.basename(f.full_name) for f in batch_files] - write_job = ds.to_zarr( - store, - write_empty_chunks=False, - mode="w", - compute=False, - consolidated=True, + partial_preprocess = partial( + preprocess_xarray, dataset_config=self.dataset_config ) - # append new files to the dataset - else: - self.logger.info(f"{self.filename}: append data to existing Zarr") - if ( - self.check_file_already_processed() - ): # case when a file should be reprocessed and write to a specific region - self.logger.info( - f"{self.filename}: update time region at slice({self.reprocessed_time_idx} , {self.reprocessed_time_idx + 1}) with new NetCDF data" - ) - # when setting `region` explicitly in to_zarr(), all variables in the dataset to write - # must have at least one dimension in common with the region's dimensions ['TIME'], - # but that is not the case for some variables here. To drop these variables - # from this dataset before exporting to zarr, write: - # .drop_vars(['LATITUDE', 'LONGITUDE', 'GDOP']) - - write_job = ds.drop_vars(self.vars_to_drop_no_common_dimension).to_zarr( - store, - write_empty_chunks=False, - region={ - self.dimensions["time"]["name"]: slice( - self.reprocessed_time_idx, self.reprocessed_time_idx + 1 + drop_vars_list = [ + var_name + for var_name, attrs in self.schema.items() + if attrs.get("drop_vars", False) + ] + self.logger.warning(f"Dropping variables {drop_vars_list} from dataset") + + with dask.config.set( + **{ + "array.slicing.split_large_chunks": False, + "distributed.scheduler.worker-saturation": "inf", + } + ): + try: + # TODO: if using preprocess function within mfdataset (has to be outside the class otherwise parallelizing issues), the + # local ram is being used! and not the cluster one! even if the function only does return ds + # solution, open at the end with ds = preprocess(ds) afterwards + # + ds = xr.open_mfdataset( + batch_files, + engine="h5netcdf", + parallel=True, + # preprocess=partial_preprocess, # this sometimes hangs the process + concat_characters=True, + mask_and_scale=True, + decode_cf=True, + decode_times=True, + use_cftime=True, + decode_coords=True, + compat="override", + coords="minimal", + data_vars="minimal", + drop_variables=drop_vars_list, + ) + + # TODO: create a simple jupyter notebook 2 show 2 different problems: + # 1) serialization issue if preprocess is within a class + # 2) blowing of memory if preprocess function is outside of a class and only does return ds + + ds = preprocess_xarray(ds, self.dataset_config) + + # NOTE: if I comment the next line, i get some errors with the latest chunk for some variables + ds = ds.chunk( + chunks=self.chunks + ) # careful with chunk size, had an issue + + # Write the dataset to Zarr + if prefix_exists(self.cloud_optimised_output_path): + self.logger.info(f"append data to existing Zarr") + + # NOTE: In the next section, we need to figure out if we're reprocessing existing data. + # For this, the logic is open the original zarr store and compare with the new ds from + # this batch if they have time values in common. + # If this is the case, we need then to find the CONTIGUOUS regions as we can't assume that + # the data is well ordered. The logic below is looking for the matching regions and indexes + + ds_org = xr.open_zarr( + self.store, + consolidated=True, + decode_cf=True, + decode_times=True, + use_cftime=True, + decode_coords=True, ) - }, - compute=True, - consolidated=True, - ) - else: - write_job = ds.to_zarr( - store, - write_empty_chunks=False, - mode="a", - compute=True, - append_dim=self.dimensions["time"]["name"], - consolidated=True, - ) - # write_job = write_job.persist() - # distributed.progress(write_job, notebook=False) - self.logger.info( - f"{self.filename}: Zarr created and pushed to {self.cloud_optimised_output_path} successfully" - ) + time_values_org = ds_org[time_dimension_name].values + time_values_new = ds[time_dimension_name].values - def to_cloud_optimised(self): - """ - Create a Zarr dataset from NetCDF data. + # Find common time values + common_time_values = np.intersect1d( + time_values_org, time_values_new + ) - Returns: - None + # Handle the 2 scenarios, reprocessing of a batch, or append new data + if len(common_time_values) > 0: + self.logger.info( + f"Duplicate values of {self.dimensions['time']['name']}" + ) + # Get indices of common time values in the original dataset + common_indices = np.nonzero( + np.isin(time_values_org, common_time_values) + )[0] + + # regions must be CONTIGIOUS!! very important. so looking for different regions + # Define regions as slices for the common time values + regions = [] + matching_indexes = [] + + start = common_indices[0] + for i in range(1, len(common_indices)): + if common_indices[i] != common_indices[i - 1] + 1: + end = common_indices[i - 1] + regions.append( + {time_dimension_name: slice(start, end + 1)} + ) + matching_indexes.append( + np.where( + np.isin( + time_values_new, + time_values_org[start : end + 1], + ) + )[0] + ) + start = common_indices[i] + + # Append the last region + end = common_indices[-1] + regions.append({time_dimension_name: slice(start, end + 1)}) + matching_indexes.append( + np.where( + np.isin( + time_values_new, + time_values_org[start : end + 1], + ) + )[0] + ) + + # Process region by region if necessary + for region, indexes in zip(regions, matching_indexes): + self.logger.info( + f"Overwriting Zarr dataset in Region: {region}, Matching Indexes in new ds: {indexes}" + ) + ds.isel(**{time_dimension_name: indexes}).drop_vars( + self.vars_to_drop_no_common_dimension + ).to_zarr( + self.store, + write_empty_chunks=False, + region=region, + compute=True, + consolidated=True, + ) + + # No reprocessing needed + else: + self.logger.info(f"Appending data to Zarr dataset") + + ds.to_zarr( + self.store, + mode="a", # append mode for the next batches + write_empty_chunks=False, # TODO: could True fix the issue when some variables dont exists? I doubt + compute=True, # Compute the result immediately + consolidated=True, + append_dim=time_dimension_name, + ) + + # First time writing the dataset + else: + self.logger.info(f"Writing data to new Zarr dataset") + + ds.to_zarr( + self.store, + mode="w", # Overwrite mode for the first batch + write_empty_chunks=False, + compute=True, # Compute the result immediately + consolidated=True, + ) - This method creates a Zarr dataset from NetCDF data. It logs the process, - creates a dataset using the 'preprocess' method, and populates the Zarr dataset - using the 'publish_cloud_optimised' method. After completion, the temporary NetCDF file - is removed. The total time taken for the operation is logged. + self.logger.info( + f"Batch {idx + 1} processed and written to {self.store}" + ) - Note: The 'preprocess' and 'publish_cloud_optimised' methods are assumed to be defined within the class. - """ + except MergeError as e: + self.logger.error(f"Failed to merge datasets: {e}") + if "ds" in locals(): + self.postprocess(ds) - if self.tmp_input_file.endswith(".nc"): - self.is_valid_netcdf( - self.tmp_input_file - ) # check file validity before doing anything else + except Exception as e: + self.logger.error(f"An unexpected error occurred: {e}") + if "ds" in locals(): + self.postprocess(ds) - try: - ds = self.preprocess() - self.publish_cloud_optimised(ds) - self.push_metadata_aws_registry() + def to_cloud_optimised(self, s3_file_uri_list=None): + """ + Create a Zarr dataset from NetCDF data. - time_spent = timeit.default_timer() - self.start_time - self.logger.info(f"Cloud Optimised file completed in {time_spent}s") + This method creates a Zarr dataset from NetCDF data stored in S3. It logs the process, + deletes existing Zarr objects if specified, processes multiple files concurrently using a cluster, + and publishes the resulting datasets using the 'publish_cloud_optimised_fileset_batch' method. - self.postprocess(ds) + Note: - except Exception as e: - self.logger.error( - f"Issue while creating Cloud Optimised file: {type(e).__name__}: {e} \n {traceback.print_exc()}" + Args: + - s3_file_uri_list (list, optional): List of S3 file URIs to process and create the Zarr dataset. + If not provided, no processing is performed. + + Returns: + None + """ + if self.clear_existing_data: + self.logger.warning( + f"Creating new Zarr dataset - DELETING existing all Zarr objects if exist" ) + # TODO: delete all objects + if prefix_exists(self.cloud_optimised_output_path): + bucket_name, prefix = split_s3_path(self.cloud_optimised_output_path) + self.logger.info( + f"Deleting existing Zarr objects from {self.cloud_optimised_output_path}" + ) + + delete_objects_in_prefix(bucket_name, prefix) - if "ds" in locals(): - self.postprocess(ds) + # Multiple file processing with cluster + if s3_file_uri_list is not None: + # creating a cluster to process multiple files at once + self.client, self.cluster = self.create_cluster() + self.publish_cloud_optimised_fileset_batch(s3_file_uri_list) + self.close_cluster(self.client, self.cluster) @staticmethod def filter_rechunk_dimensions(dimensions): @@ -383,7 +522,7 @@ def rechunk(self, max_mem="8.0GB"): self.dimensions ) # only return a dict with the dimensions to rechunk - s3 = s3fs.S3FileSystem(anon=False) + # s3 = s3fs.S3FileSystem(anon=False) org_url = ( self.cloud_optimised_output_path @@ -393,7 +532,7 @@ def rechunk(self, max_mem="8.0GB"): target_url = org_url.replace( f"{self.dataset_name}", f"{self.dataset_name}_rechunked" ) - target_store = s3fs.S3Map(root=f"{target_url}", s3=s3, check=False) + target_store = s3fs.S3Map(root=f"{target_url}", s3=self.s3_fs, check=False) # zarr.consolidate_metadata(org_store) ds = xr.open_zarr(fsspec.get_mapper(org_url, anon=True), consolidated=True) @@ -402,7 +541,7 @@ def rechunk(self, max_mem="8.0GB"): f"{self.dataset_name}", f"{self.dataset_name}_intermediate" ) - temp_store = s3fs.S3Map(root=f"{temp_url}", s3=s3, check=False) + temp_store = s3fs.S3Map(root=f"{temp_url}", s3=self.s3_fs, check=False) # delete previous version of intermediate and rechunked data s3_client = boto3.resource("s3") diff --git a/aodn_cloud_optimised/lib/ParquetDataQuery.py b/aodn_cloud_optimised/lib/ParquetDataQuery.py index b0ae39c..af31584 100644 --- a/aodn_cloud_optimised/lib/ParquetDataQuery.py +++ b/aodn_cloud_optimised/lib/ParquetDataQuery.py @@ -6,7 +6,9 @@ import os import re from datetime import datetime +from functools import lru_cache +import boto3 import geopandas as gpd import matplotlib.pyplot as plt import numpy as np @@ -14,6 +16,10 @@ import pyarrow as pa import pyarrow.compute as pc import pyarrow.parquet as pq +import pyarrow.parquet as pq +from botocore import UNSIGNED +from botocore.client import Config +from fuzzywuzzy import fuzz from shapely import wkb from shapely.geometry import Polygon, MultiPolygon @@ -300,3 +306,200 @@ def get_schema_metadata(dname): for key, value in parquet_meta.metadata.items() } return decoded_meta + + +#################################################################################################################### +# Work done during IMOS HACKATHON 2024 +# https://github.com/aodn/IMOS-hackathon/blob/main/2024/Projects/CARSv2/notebooks/get_aodn_example_hackathon.ipynb +################################################################################################################### +class GetAodn: + def __init__(self): + self.bucket_name = "imos-data-lab-optimised" + self.prefix = "cloud_optimised/cluster_testing" + + def get_dataset(self, dataset_name): + return Dataset(self.bucket_name, self.prefix, dataset_name) + + def get_metadata(self): + return Metadata(self.bucket_name, self.prefix) + + +class Dataset: + def __init__(self, bucket_name, prefix, dataset_name): + self.bucket_name = bucket_name + self.prefix = prefix + self.dataset_name = dataset_name + self.dname = ( + f"s3://{self.bucket_name}/{self.prefix}/{self.dataset_name}.parquet/" + ) + self.parquet_ds = pq.ParquetDataset(self.dname, partitioning="hive") + + def partition_keys_list(self): + dataset = pq.ParquetDataset(self.dname, format="parquet", partitioning="hive") + partition_keys = dataset.partitioning.schema + return partition_keys + + def get_spatial_extent(self): + return get_spatial_extent(self.parquet_ds) + + def plot_spatial_extent(self): + return plot_spatial_extent(self.parquet_ds) + + def get_temporal_extent(self): + return get_temporal_extent(self.parquet_ds) + + def get_data( + self, + date_start=None, + date_end=None, + lat_min=None, + lat_max=None, + lon_min=None, + lon_max=None, + scalar_filter=None, + ): + # TODO fix the whole logic as not everything is considered + + # time filter: doesnt require date_end + if date_end == None: + now = datetime.now() + date_end = now.strftime("%Y-%m-%d %H:%M:%S") + + if date_start == None: + filter_time = None + else: + filter_time = create_time_filter( + self.parquet_ds, date_start=date_start, date_end=date_end + ) + + # Geometry filter requires ALL optional args to be defined + if lat_min == None or lat_max == None or lon_min == None or lon_max == None: + filter_geo = None + else: + filter_geo = create_bbox_filter( + self.parquet_ds, + lat_min=lat_min, + lat_max=lat_max, + lon_min=lon_min, + lon_max=lon_max, + ) + + # scalar filter + if scalar_filter != None: + expr = None + for item in scalar_filter: + expr_1 = pc.field(item) == pa.scalar(scalar_filter[item]) + if type(expr) != pc.Expression: + expr = expr_1 + else: + expr = expr_1 & expr + + # merge filters together + if type(filter_time) != pc.Expression: + data_filter = filter_geo + elif type(filter_geo) != pc.Expression: + data_filter = filter_time + elif (type(filter_geo) != pc.Expression) & (type(filter_time) != pc.Expression): + data_filter = None + else: + data_filter = filter_geo & filter_time + + # add scalar filter to data_filter + if scalar_filter != None: + data_filter = data_filter & expr + + df = pd.read_parquet(self.dname, engine="pyarrow", filters=data_filter) + return df + + def get_metadata(self): + return get_schema_metadata(self.dname) + + +class Metadata: + def __init__(self, bucket_name, prefix): + # super().__init__() + # initialise the class by calling the needed methods + self.bucket_name = bucket_name + self.prefix = prefix + self.catalog = self.metadata_catalog() + + def metadata_catalog_uncached(self): + # print('Running metadata_catalog_uncached...') # Debug output + + folders_with_parquet = self.list_folders_with_parquet() + catalog = {} + + for dataset in folders_with_parquet: + dname = f"s3://{self.bucket_name}/{dataset}" + metadata = get_schema_metadata(dname) # schema metadata + + path_parts = dataset.strip("/").split("/") + last_folder_with_extension = path_parts[-1] + dataset_name = os.path.splitext(last_folder_with_extension)[0] + + catalog[dataset_name] = metadata + + return catalog + + @lru_cache(maxsize=None) + def metadata_catalog(self): + # print('Running metadata_catalog...') # Debug output + if "catalog" in self.__dict__: + return self.catalog + else: + return self.metadata_catalog_uncached() + + def list_folders_with_parquet(self): + s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED)) + prefix = self.prefix + + if not prefix.endswith("/"): + prefix += "/" + + response = s3.list_objects_v2( + Bucket=self.bucket_name, Prefix=prefix, Delimiter="/" + ) + + folders = [] + for prefix in response.get("CommonPrefixes", []): + folder_path = prefix["Prefix"] + if folder_path.endswith(".parquet/"): + folder_name = folder_path[len(prefix) - 1 :] + folders.append(folder_name) + + return folders + + def find_datasets_with_attribute( + self, target_value, target_key="standard_name", data_dict=None, threshold=80 + ): + + matching_datasets = [] + # https://stackoverflow.com/questions/56535948/python-why-cant-you-use-a-self-variable-as-an-optional-argument-in-a-method + if data_dict == None: + data_dict = self.metadata_catalog() + + if not isinstance(data_dict, dict): + return matching_datasets # handle bad cases + + for dataset_name, attributes in data_dict.items(): + if not isinstance(attributes, dict): + continue + + for key, value in attributes.items(): + if isinstance(value, dict) and target_key in value: + # Check for any attribute available in a dict(catalog) match using fuzzy matching + current_standard_name = value.get(target_key, "") + similarity_score = fuzz.partial_ratio( + target_value.lower(), current_standard_name.lower() + ) + if similarity_score >= threshold: + matching_datasets.append( + dataset_name + ) # Add dataset name to list + + # Recursively search + matching_datasets.extend( + self.find_datasets_with_attribute(value, target_value, threshold) + ) + + return list(set(matching_datasets)) diff --git a/aodn_cloud_optimised/lib/config.py b/aodn_cloud_optimised/lib/config.py index 350de8c..c9b5a7b 100644 --- a/aodn_cloud_optimised/lib/config.py +++ b/aodn_cloud_optimised/lib/config.py @@ -1,49 +1,128 @@ import json import yaml import os -import importlib.resources from collections import OrderedDict from importlib.resources import path -def load_variable_from_file(file_path, variable_name) -> str: +def merge_dicts(parent, child): + """ + Merge two dictionaries, giving priority to the child dictionary. + + :param parent: The parent dictionary. + :param child: The child dictionary whose values will override those of the parent. + :return: The merged dictionary with child's values taking precedence. + """ + for key, value in child.items(): + if isinstance(value, dict) and key in parent: + parent[key] = merge_dicts(parent[key], value) + else: + parent[key] = value + return parent + + +def load_config(file_path): + """ + Load a configuration file in either YAML or JSON format. + + :param file_path: Path to the configuration file. + :return: The loaded configuration as a dictionary. + :raises ValueError: If the file format is unsupported. + :raises FileNotFoundError: If the file is not found. + """ try: with open(file_path, "r") as file: if file_path.endswith(".yaml"): - variables = yaml.safe_load(file) + return yaml.safe_load(file) elif file_path.endswith(".json"): - variables = json.load(file, object_pairs_hook=OrderedDict) + return json.load( + file + ) # remove this as it's breaking the metadata for parquet, object_pairs_hook=OrderedDict) else: raise ValueError( "Unsupported file format. Please provide either a YAML or JSON file." ) - - if variable_name in variables: - return variables[variable_name] - else: - raise KeyError( - f"Variable '{variable_name}' not found in the file '{file_path}'." - ) except FileNotFoundError: raise FileNotFoundError(f"File '{file_path}' not found.") +def load_variable_from_file(file_path, variable_name) -> str: + """ + Load a specific variable from a configuration file, considering parent configurations if specified. + + :param file_path: Path to the configuration file. + :param variable_name: Name of the variable to retrieve. + :return: The value of the specified variable. + :raises KeyError: If the variable is not found in the configuration file. + """ + # Load the child configuration + variables = load_config(file_path) + + # Check for a parent configuration file + parent_config_path = variables.get("parent_config") + if parent_config_path: + # Construct the full path to the parent configuration file + parent_config_path = os.path.join( + os.path.dirname(file_path), parent_config_path + ) + # Load the parent configuration + parent_variables = load_config(parent_config_path) + # Merge the parent and child configurations + variables = merge_dicts(parent_variables, variables) + + # Retrieve the variable + if variable_name in variables: + return variables[variable_name] + else: + raise KeyError( + f"Variable '{variable_name}' not found in the file '{file_path}'." + ) + + def load_variable_from_config(variable_name) -> str: + """ + Load a specific variable from the common configuration file. + + :param variable_name: Name of the variable to retrieve. + :return: The value of the specified variable. + :raises KeyError: If the variable is not found in the configuration file. + """ # Obtain the file path using the context manager with path("aodn_cloud_optimised.config", "common.json") as common_config_path: return load_variable_from_file(str(common_config_path), variable_name) def load_dataset_config(config_path) -> dict: - try: - with open(config_path, "r") as file: - if config_path.endswith(".json"): - dataset_config = json.load(file) - else: - raise ValueError( - "Unsupported file format. Please provide either a YAML or JSON file." - ) + """ + Load a dataset configuration, considering parent configurations if specified. + + :param config_path: Path to the dataset configuration file. + :return: The loaded dataset configuration as a dictionary. + :raises FileNotFoundError: If the parent configuration file is not found. + :raises ValueError: If there is an error loading the parent configuration file. + """ + # Load the child configuration + dataset_config = load_config(config_path) + + # Check for a parent configuration file + parent_config_path = dataset_config.get("parent_config") + if parent_config_path: + # Construct the full path to the parent configuration file which is in the same directory + parent_config_path = os.path.join( + os.path.dirname(config_path), parent_config_path + ) + # Load the parent configuration + try: + parent_dataset_config = load_config(parent_config_path) + # Merge the parent and child configurations + dataset_config = merge_dicts(parent_dataset_config, dataset_config) + except FileNotFoundError: + raise FileNotFoundError( + f"Parent configuration file '{parent_config_path}' not found." + ) + except ValueError as e: + raise ValueError( + f"Error loading parent configuration file '{parent_config_path}': {e}" + ) - return dataset_config - except Exception as e: - raise TypeError(f"{e}") + return dataset_config diff --git a/aodn_cloud_optimised/lib/s3Tools.py b/aodn_cloud_optimised/lib/s3Tools.py index a87fd3e..f8b2094 100755 --- a/aodn_cloud_optimised/lib/s3Tools.py +++ b/aodn_cloud_optimised/lib/s3Tools.py @@ -1,33 +1,58 @@ import boto3 +from urllib.parse import urlparse +import s3fs +import logging -def s3_ls(bucket, prefix, suffix=".nc") -> list: +def s3_ls(bucket, prefix, suffix=".nc", s3_path=True) -> list: """ Return a list of object keys under a specific prefix in the specified S3 bucket with the specified suffix. Args: - prefix (str): The prefix to filter objects in the S3 bucket. bucket (str): The name of the S3 bucket. + prefix (str): The prefix to filter objects in the S3 bucket. suffix (str, optional): The suffix to filter object keys (default is '.nc'). + s3_path (bool, optional): Whether to return S3 paths or object keys without the bucket name (default is True). Returns: list[str]: A list of object keys under the specified prefix and with the specified suffix. + If s3_path=True, returns list of S3 paths (s3://bucket_name/key). + If s3_path=False, returns list of object keys (key). """ + # Store the initial logger state + initial_logger = logging.getLogger() + + # Check if the root logger already has handlers + if not initial_logger.hasHandlers(): + # Set up logging configuration if no handlers exist + logging.basicConfig(level=logging.INFO) # Set the logging level as needed + + # Get the logger instance + logger = logging.getLogger() + + logger.info(f"Listing S3 objects in {bucket} under {prefix} ending with {suffix}") + s3 = boto3.client("s3") paginator = s3.get_paginator("list_objects_v2") pages = paginator.paginate(Bucket=bucket, Prefix=prefix) - s3_obj = [] + s3_objs = [] for page in pages: - for object in page["Contents"]: - if object["Key"].endswith(suffix): - s3_obj.append(object["Key"]) + for obj in page.get("Contents", []): + if obj["Key"].endswith(suffix): + if s3_path: + s3_objs.append(f"s3://{bucket}/{obj['Key']}") + else: + s3_objs.append(obj["Key"]) - s3.close() - return s3_obj + if not initial_logger.hasHandlers(): + # Restore the original state if no handlers were initially present + logging.shutdown() + + return s3_objs def delete_objects_in_prefix(bucket_name, prefix): @@ -58,6 +83,9 @@ def delete_objects_in_prefix(bucket_name, prefix): """ s3 = boto3.client("s3") + # Get the logger instance + logger = logging.getLogger() + # Continuation token for paginated results continuation_token = None @@ -75,7 +103,9 @@ def delete_objects_in_prefix(bucket_name, prefix): # Check if there are any objects to delete if "Contents" not in response: - print(f"No objects found with prefix '{prefix}' in bucket '{bucket_name}'.") + logger.info( + f"No objects found with prefix '{prefix}' in bucket '{bucket_name}'." + ) return # Collect object keys to delete @@ -89,10 +119,83 @@ def delete_objects_in_prefix(bucket_name, prefix): }, ) - print(f"Deleted {len(delete_response['Deleted'])} objects.") + logger.info(f"Deleted {len(delete_response['Deleted'])} objects.") # Check if there are more objects to delete if response["IsTruncated"]: continuation_token = response["NextContinuationToken"] else: break + + +def split_s3_path(s3_path): + """ + Split an S3 path into bucket name and prefix. + + Args: + s3_path (str): The S3 path (e.g., 's3://bucket-name/path/to/object/'). + + Returns: + tuple: A tuple containing the bucket name and prefix. + """ + parsed_url = urlparse(s3_path) + bucket_name = parsed_url.netloc + prefix = parsed_url.path.lstrip("/") + return bucket_name, prefix + + +def prefix_exists(s3_path): + """ + Check if a given S3 prefix exists. + + This function parses an S3 path to extract the bucket name and prefix, + then checks if the prefix exists in the specified S3 bucket. + + Args: + s3_path (str): The S3 path to check, in the format "s3://bucket-name/prefix". + + Returns: + bool: True if the prefix exists, False otherwise. + + Raises: + ValueError: If the provided path does not appear to be an S3 URL. + + """ + # Parse the S3 path + parsed_url = urlparse(s3_path) + + if parsed_url.scheme != "s3": + raise ValueError("The provided path does not appear to be an S3 URL.") + + bucket_name = parsed_url.netloc + prefix = parsed_url.path.lstrip("/") + + s3_client = boto3.client("s3") + response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix, MaxKeys=1) + return "Contents" in response + + +def create_fileset(s3_paths, s3_fs=None): + """ + Create a fileset from S3 objects specified by a list of full S3 paths. + + Args: + s3_paths (str or list[str]): Either a single full S3 path (e.g., 's3://bucket_name/object_key') + or a list of full S3 paths. + + Returns: + list[file-like object]: List of file-like objects representing each object in the fileset. + """ + if s3_fs is None: + s3_fs = s3fs.S3FileSystem(anon=True) + + if isinstance(s3_paths, str): + s3_paths = [s3_paths] + + if not isinstance(s3_paths, list): + raise ValueError("Invalid input format. Expecting either str or list[str].") + + # Create a fileset by opening each file + fileset = [s3_fs.open(file) for file in s3_paths] + + return fileset diff --git a/aodn_cloud_optimised/lib/schema.py b/aodn_cloud_optimised/lib/schema.py index 7746386..23d5ae4 100755 --- a/aodn_cloud_optimised/lib/schema.py +++ b/aodn_cloud_optimised/lib/schema.py @@ -1,89 +1,100 @@ +import json +import tempfile + +import numpy as np import pyarrow as pa import s3fs import xarray as xr -import numpy as np -import json -import tempfile -def generate_pyarrow_schema_from_s3_netcdf(s3_object_address, sub_schema): +def custom_encoder(obj): + if isinstance(obj, np.float32): + return float(obj) # Convert np.float32 to Python float + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + +def generate_json_schema_var_from_netcdf(nc_path, var_name, indent=2, s3_fs=None): """ - Extracts variable names and types from a NetCDF file in S3 and returns a PyArrow pyarrow_schema. + Extracts variable names, types, and attributes from a NetCDF file and returns a JSON-formatted schema. Args: - s3_object_address (str): The address of the NetCDF object in S3 format, - e.g., "s3://your-bucket/path/to/file.nc". + nc_path (str or S3File): Path to a local NetCDF file or S3 address of the NetCDF file, + e.g., "s3://your-bucket/path/to/file.nc", or an open S3File object. + var_name (str): Name of the variable or coordinate to extract schema for. + indent (int, optional): Number of spaces for JSON indentation (default is 2). + s3_fs (s3fs.S3FileSystem, optional): S3FileSystem instance used to open S3 objects (default is None). Returns: - pyarrow.Schema: The inferred PyArrow pyarrow_schema from the NetCDF file. + str: JSON-formatted string representing the variable schema. """ - s3 = s3fs.S3FileSystem() - with s3.open(s3_object_address, "rb") as f: - dataset = xr.open_dataset(f) - - variables = list(dataset.variables.keys()) - types = [pa.from_numpy_dtype(dataset[var].dtype) for var in variables] - - # Create the base pyarrow_schema from the NetCDF file - base_schema = pa.schema(list(zip(variables, types))) + if isinstance(nc_path, s3fs.S3File): + if s3_fs is None: + s3_fs = s3fs.S3FileSystem(anon=True) + + # Open dataset from S3 file-like object using with statement + with s3_fs.open(nc_path) as f: + with xr.open_dataset(f) as dataset: + schema = extract_variable_schema(dataset, var_name) + elif nc_path.startswith("s3://"): + with s3_fs.open(nc_path) as f: + with xr.open_dataset(f) as dataset: + schema = extract_variable_schema(dataset, var_name) + else: + with xr.open_dataset(nc_path) as dataset: + schema = extract_variable_schema(dataset, var_name) - # Combine the base pyarrow_schema and the provided subschema - combined_schema = pa.unify_schemas([base_schema, sub_schema]) + json_str = json.dumps(schema, indent=indent, default=custom_encoder) - return combined_schema + return json_str -def generate_json_schema_var_from_netcdf(nc_path, var_name, indent=2): +def extract_variable_schema(dataset, var_name): """ - Extracts variable names, types, and attributes from a NetCDF file in S3 and prints a JSON pyarrow_schema. + Extracts variable schema (dtype and attributes) from an xarray Dataset or DataArray. Args: - s3_object_nc_pathaddress (str): The address of the NetCDF object in S3 format, - e.g., "s3://your-bucket/path/to/file.nc". - indent (int, optional): Number of spaces for JSON indentation (default is 2). + dataset (xarray.Dataset or xarray.DataArray): The xarray dataset or data array. + var_name (str): Name of the variable or coordinate to extract schema for. Returns: - None + dict: Dictionary representing the variable schema. """ - with open(nc_path, "rb") as f: - dataset = xr.open_dataset(f) - schema = {} # Process variables if var_name in dataset.variables: var_dtype = dataset.variables[var_name].dtype - dtype_str = convert_dtype_to_str(var_dtype) - var_attrs = extract_serialisable_attrs(dataset.variables[var_name].attrs) + dtype_str = str(var_dtype) + var_attrs = dataset.variables[var_name].attrs schema[var_name] = {"type": dtype_str, **var_attrs} elif var_name in dataset.coords: coord_dtype = dataset.coords[var_name].dtype - dtype_str = convert_dtype_to_str(coord_dtype) - coord_attrs = extract_serialisable_attrs(dataset.coords[var_name].attrs) + dtype_str = str(coord_dtype) + coord_attrs = dataset.coords[var_name].attrs schema[var_name] = {"type": dtype_str, **coord_attrs} - # Convert the pyarrow_schema dictionary to a JSON-formatted string with indentation - json_str = json.dumps(schema, indent=indent) - - # Print the JSON string with double quotes for easy copy/paste - return json_str + return schema -def generate_json_schema_from_s3_netcdf(s3_object_address, indent=2): +def generate_json_schema_from_s3_netcdf(s3_object_address, indent=2, s3_fs=None): """ - Extracts variable names, types, and attributes from a NetCDF file in S3 and prints a JSON pyarrow_schema. + Extracts variable names, types, and attributes from a NetCDF file in S3 and returns a JSON-formatted schema. Args: s3_object_address (str): The address of the NetCDF object in S3 format, e.g., "s3://your-bucket/path/to/file.nc". indent (int, optional): Number of spaces for JSON indentation (default is 2). + s3_fs (s3fs.S3FileSystem, optional): S3FileSystem instance used to open S3 objects (default is None). Returns: - None + str: Path to a temporary JSON file containing the variable schema. """ - s3 = s3fs.S3FileSystem() - with s3.open(s3_object_address, "rb") as f: + + if s3_fs is None: + s3_fs = s3fs.S3FileSystem(anon=True) + + with s3_fs.open(s3_object_address, "rb") as f: dataset = xr.open_dataset(f) schema = {} diff --git a/environment.yml b/environment.yml index 44b6df6..40654c3 100755 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - notebook - h5py - scipy - - pip + - pip<24.1 - pip: - poetry - -r requirements.txt diff --git a/integration_testing/test_ardc_wave_nrt.py b/integration_testing/test_ardc_wave_nrt.py index 3e1b55a..3598e24 100755 --- a/integration_testing/test_ardc_wave_nrt.py +++ b/integration_testing/test_ardc_wave_nrt.py @@ -7,7 +7,7 @@ import pyarrow.dataset as pds import pyarrow.parquet as pq -from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation_loop +from aodn_cloud_optimised.lib.CommonHandler import cloud_optimised_creation from aodn_cloud_optimised.lib.ParquetDataQuery import * from aodn_cloud_optimised.lib.config import ( load_variable_from_config, @@ -49,7 +49,7 @@ def setUpClass(cls): ), ) - cloud_optimised_creation_loop( + cloud_optimised_creation( nc_obj_ls, dataset_config=dataset_config, raw_bucket_name=load_variable_from_config( diff --git a/notebooks/argo_core.ipynb b/notebooks/argo_core.ipynb index c00d8b8..c045c28 100644 --- a/notebooks/argo_core.ipynb +++ b/notebooks/argo_core.ipynb @@ -1,1153 +1,1156 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Toi10WECQdzJ" - }, - "source": [ - "## Access ARGO Core data in Parquet\n", - "\n", - "A jupyter notebook to show how to access and plot ARGO Core data available as a [Parquet](https://parquet.apache.org) dataset on S3" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "TJf1YgjtQdzS" - }, - "outputs": [], - "source": [ - "dataset_name = \"argo_core\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "u-FcvQ0UQdzW" - }, - "source": [ - "## Install/Update packages and Load common functions" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "YB7J7Y8FQdzY", - "outputId": "cd691404-a147-4c3d-96f1-55e8d2e66427", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting s3fs\n", - " Downloading s3fs-2024.3.1-py3-none-any.whl (29 kB)\n", - "Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs)\n", - " Downloading aiobotocore-2.12.3-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting fsspec==2024.3.1 (from s3fs)\n", - " Downloading fsspec-2024.3.1-py3-none-any.whl (171 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.0/172.0 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.10/dist-packages (from s3fs) (3.9.5)\n", - "Collecting botocore<1.34.70,>=1.34.41 (from aiobotocore<3.0.0,>=2.5.4->s3fs)\n", - " Downloading botocore-1.34.69-py3-none-any.whl (12.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m26.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: wrapt<2.0.0,>=1.10.10 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs) (1.14.1)\n", - "Collecting aioitertools<1.0.0,>=0.5.1 (from aiobotocore<3.0.0,>=2.5.4->s3fs)\n", - " Downloading aioitertools-0.11.0-py3-none-any.whl (23 kB)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (23.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.4.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (6.0.5)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.9.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (4.0.3)\n", - "Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs)\n", - " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (2.8.2)\n", - "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (2.0.7)\n", - "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.0->aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (3.7)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (1.16.0)\n", - "Installing collected packages: jmespath, fsspec, aioitertools, botocore, aiobotocore, s3fs\n", - " Attempting uninstall: fsspec\n", - " Found existing installation: fsspec 2023.6.0\n", - " Uninstalling fsspec-2023.6.0:\n", - " Successfully uninstalled fsspec-2023.6.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "torch 2.2.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cuda-runtime-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cudnn-cu12==8.9.2.26; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cufft-cu12==11.0.2.54; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-curand-cu12==10.3.2.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cusolver-cu12==11.4.5.107; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-cusparse-cu12==12.1.0.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-nccl-cu12==2.19.3; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "torch 2.2.1+cu121 requires nvidia-nvtx-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", - "gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.3.1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed aiobotocore-2.12.3 aioitertools-0.11.0 botocore-1.34.69 fsspec-2024.3.1 jmespath-1.0.1 s3fs-2024.3.1\n", - "Collecting pyarrow==16.0.0\n", - " Downloading pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.8/40.8 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow==16.0.0) (1.25.2)\n", - "Installing collected packages: pyarrow\n", - " Attempting uninstall: pyarrow\n", - " Found existing installation: pyarrow 14.0.2\n", - " Uninstalling pyarrow-14.0.2:\n", - " Successfully uninstalled pyarrow-14.0.2\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 16.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed pyarrow-16.0.0\n", - "Collecting zarr\n", - " Downloading zarr-2.17.2-py3-none-any.whl (208 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.5/208.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: xarray[complete] in /usr/local/lib/python3.10/dist-packages (2023.7.0)\n", - "Collecting asciitree (from zarr)\n", - " Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from zarr) (1.25.2)\n", - "Collecting numcodecs>=0.10.0 (from zarr)\n", - " Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting fasteners (from zarr)\n", - " Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n", - "Requirement already satisfied: pandas>=1.4 in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2.0.3)\n", - "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (24.0)\n", - "Collecting netCDF4 (from xarray[complete])\n", - " Downloading netCDF4-1.6.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: h5netcdf in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.3.0)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.11.4)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2024.3.1)\n", - "Collecting cftime (from xarray[complete])\n", - " Downloading cftime-1.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m71.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pooch in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.8.1)\n", - "Collecting bottleneck (from xarray[complete])\n", - " Downloading Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (354 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m354.1/354.1 kB\u001b[0m \u001b[31m39.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting numbagg (from xarray[complete])\n", - " Downloading numbagg-0.8.1-py3-none-any.whl (48 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.4/48.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting flox (from xarray[complete])\n", - " Downloading flox-0.9.6-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.9/62.9 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: dask[complete] in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2023.8.1)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (3.7.1)\n", - "Requirement already satisfied: seaborn in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (0.13.1)\n", - "Collecting nc-time-axis (from xarray[complete])\n", - " Downloading nc_time_axis-1.4.1-py3-none-any.whl (17 kB)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2023.4)\n", - "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2024.1)\n", - "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (8.1.7)\n", - "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (2.2.1)\n", - "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (1.4.1)\n", - "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (6.0.1)\n", - "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (0.12.1)\n", - "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (7.1.0)\n", - "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (16.0.0)\n", - "Collecting lz4>=4.3.2 (from dask[complete]->xarray[complete])\n", - " Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m73.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting numpy-groupies>=0.9.19 (from flox->xarray[complete])\n", - " Downloading numpy_groupies-0.11.1-py3-none-any.whl (40 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from h5netcdf->xarray[complete]) (3.9.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (1.2.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (4.51.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (1.4.5)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (9.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (3.1.2)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from netCDF4->xarray[complete]) (2024.2.2)\n", - "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from numbagg->xarray[complete]) (0.58.1)\n", - "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch->xarray[complete]) (4.2.1)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch->xarray[complete]) (2.31.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=4.13.0->dask[complete]->xarray[complete]) (3.18.1)\n", - "Requirement already satisfied: locket in /usr/local/lib/python3.10/dist-packages (from partd>=1.2.0->dask[complete]->xarray[complete]) (1.0.0)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=1.4->xarray[complete]) (1.16.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (2.0.7)\n", - "Requirement already satisfied: distributed==2023.8.1 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (2023.8.1)\n", - "Requirement already satisfied: bokeh>=2.4.2 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (3.3.4)\n", - "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (3.1.3)\n", - "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (1.0.8)\n", - "Requirement already satisfied: psutil>=5.7.2 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (5.9.5)\n", - "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (2.4.0)\n", - "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (3.0.0)\n", - "Requirement already satisfied: tornado>=6.0.4 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (6.3.3)\n", - "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (3.0.0)\n", - "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->numbagg->xarray[complete]) (0.41.1)\n", - "Requirement already satisfied: xyzservices>=2021.09.1 in /usr/local/lib/python3.10/dist-packages (from bokeh>=2.4.2->dask[complete]->xarray[complete]) (2024.4.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.10.3->dask[complete]->xarray[complete]) (2.1.5)\n", - "Building wheels for collected packages: asciitree\n", - " Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=f4c9dc76bbe57736c6774f532134e8c8d677c7b73bc64d29866fa1c9bfd16312\n", - " Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n", - "Successfully built asciitree\n", - "Installing collected packages: asciitree, numpy-groupies, numcodecs, lz4, fasteners, cftime, bottleneck, zarr, numbagg, netCDF4, nc-time-axis, flox\n", - "Successfully installed asciitree-0.3.3 bottleneck-1.3.8 cftime-1.6.3 fasteners-0.19 flox-0.9.6 lz4-4.3.3 nc-time-axis-1.4.1 netCDF4-1.6.5 numbagg-0.8.1 numcodecs-0.12.1 numpy-groupies-0.11.1 zarr-2.17.2\n", - "Collecting pandas==2.2.2\n", - " Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (1.25.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2023.4)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2024.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas==2.2.2) (1.16.0)\n", - "Installing collected packages: pandas\n", - " Attempting uninstall: pandas\n", - " Found existing installation: pandas 2.0.3\n", - " Uninstalling pandas-2.0.3:\n", - " Successfully uninstalled pandas-2.0.3\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "google-colab 1.0.0 requires pandas==2.0.3, but you have pandas 2.2.2 which is incompatible.\n", - "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 16.0.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed pandas-2.2.2\n" - ] - } - ], - "source": [ - "# only run once, then restart session and comment the next 3 lines\n", - "!pip install s3fs -U\n", - "!pip install pyarrow==16.0.0 -U\n", - "!pip install zarr xarray[complete]\n", - "!pip install pandas==2.2.2 -U" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "zeETrs8zQdza", - "outputId": "5af2116e-0fd5-46a4-fc2c-d0a26d8fe24c", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Downloading parquet_queries.py\n" - ] - } - ], - "source": [ - "import requests\n", - "import os\n", - "if not os.path.exists('parquet_queries.py'):\n", - " print('Downloading parquet_queries.py')\n", - " url = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/notebooks/parquet_queries.py'\n", - " response = requests.get(url)\n", - " with open('parquet_queries.py', 'w') as f:\n", - " f.write(response.text)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "XzHxvlw8Qdzc" - }, - "outputs": [], - "source": [ - "from parquet_queries import create_time_filter, create_bbox_filter, query_unique_value, plot_spatial_extent, get_spatial_extent, get_temporal_extent, get_schema_metadata\n", - "import pyarrow.parquet as pq\n", - "import pyarrow.dataset as pds\n", - "import pyarrow as pa\n", - "import os\n", - "import pandas as pd\n", - "import pyarrow.compute as pc" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AycdabTJQdze" - }, - "source": [ - "## Location of the parquet dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "F5rE3sRuQdzf" - }, - "outputs": [], - "source": [ - "BUCKET_OPTIMISED_DEFAULT=\"imos-data-lab-optimised\"\n", - "dname = f\"s3://{BUCKET_OPTIMISED_DEFAULT}/parquet/loz_test/{dataset_name}.parquet/\"\n", - "parquet_ds = pq.ParquetDataset(dname,partitioning='hive')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9IWdsh9NQdzh" - }, - "source": [ - "# Understanding the Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DQWJBByXQdzh" - }, - "source": [ - "## Get partition keys\n", - "Partitioning in Parquet involves organising data files based on the values of one or more columns, known as partition keys. When data is written to Parquet files with partitioning enabled, the files are physically stored in a directory structure that reflects the partition keys. This directory structure makes it easier to retrieve and process specific subsets of data based on the partition keys." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "oBVjvC1tQdzi", - "outputId": "9896acb9-168b-4038-babc-ea4048e1d7fc", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "timestamp: int32\n", - "PLATFORM_NUMBER: int32\n", - "polygon: string\n" - ] - } - ], - "source": [ - "dataset = pds.dataset(dname, format=\"parquet\", partitioning=\"hive\")\n", - "\n", - "partition_keys = dataset.partitioning.schema\n", - "print(partition_keys)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nsP2odmAQdzk" - }, - "source": [ - "## List unique partition values" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "HIZF5O3cQdzl", - "outputId": "abb16db6-2a9c-49d9-daf2-c00e1afb38ea", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "['2900887', '2901090']\n", - "CPU times: user 386 ms, sys: 6.69 ms, total: 392 ms\n", - "Wall time: 393 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "unique_partition_value = query_unique_value(parquet_ds, 'PLATFORM_NUMBER')\n", - "print(list(unique_partition_value)[0:2]) # showing a subset only" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B6mnQtRWQdzm" - }, - "source": [ - "## Visualise Spatial Extent of the dataset\n", - "In this section, we're plotting the polygons where data exists. This helps then with creating a bounding box where there is data" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "owI5Rx0kQdzn", - "outputId": "d310299b-a056-4b71-f60f-05bfd25e00bb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 318 - } - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": {} - } - ], - "source": [ - "plot_spatial_extent(parquet_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XqoUU0rSQdzn" - }, - "source": [ - "## Get Temporal Extent of the dataset" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Toi10WECQdzJ" + }, + "source": [ + "## Access ARGO Core data in Parquet\n", + "\n", + "A jupyter notebook to show how to access and plot ARGO Core data available as a [Parquet](https://parquet.apache.org) dataset on S3" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "TJf1YgjtQdzS" + }, + "outputs": [], + "source": [ + "dataset_name = \"argo_core\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u-FcvQ0UQdzW" + }, + "source": [ + "## Install/Update packages and Load common functions" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "YB7J7Y8FQdzY", + "outputId": "cd691404-a147-4c3d-96f1-55e8d2e66427" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "j8hIUrEiQdzo" - }, - "source": [ - "Similary to the spatial extent, we're retrieving the minimum and maximum timestamp partition values of the dataset. This is not necessarely accurately representative of the TIME values, as the timestamp partition can be yearly/monthly... but is here to give an idea" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting s3fs\n", + " Downloading s3fs-2024.3.1-py3-none-any.whl (29 kB)\n", + "Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs)\n", + " Downloading aiobotocore-2.12.3-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.5/76.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fsspec==2024.3.1 (from s3fs)\n", + " Downloading fsspec-2024.3.1-py3-none-any.whl (171 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.0/172.0 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.10/dist-packages (from s3fs) (3.9.5)\n", + "Collecting botocore<1.34.70,>=1.34.41 (from aiobotocore<3.0.0,>=2.5.4->s3fs)\n", + " Downloading botocore-1.34.69-py3-none-any.whl (12.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m26.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: wrapt<2.0.0,>=1.10.10 in /usr/local/lib/python3.10/dist-packages (from aiobotocore<3.0.0,>=2.5.4->s3fs) (1.14.1)\n", + "Collecting aioitertools<1.0.0,>=0.5.1 (from aiobotocore<3.0.0,>=2.5.4->s3fs)\n", + " Downloading aioitertools-0.11.0-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (4.0.3)\n", + "Collecting jmespath<2.0.0,>=0.7.1 (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs)\n", + " Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (2.8.2)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (2.0.7)\n", + "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.0->aiohttp!=4.0.0a0,!=4.0.0a1->s3fs) (3.7)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.34.70,>=1.34.41->aiobotocore<3.0.0,>=2.5.4->s3fs) (1.16.0)\n", + "Installing collected packages: jmespath, fsspec, aioitertools, botocore, aiobotocore, s3fs\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2023.6.0\n", + " Uninstalling fsspec-2023.6.0:\n", + " Successfully uninstalled fsspec-2023.6.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torch 2.2.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cuda-runtime-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cudnn-cu12==8.9.2.26; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cufft-cu12==11.0.2.54; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-curand-cu12==10.3.2.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cusolver-cu12==11.4.5.107; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-cusparse-cu12==12.1.0.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-nccl-cu12==2.19.3; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "torch 2.2.1+cu121 requires nvidia-nvtx-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n", + "gcsfs 2023.6.0 requires fsspec==2023.6.0, but you have fsspec 2024.3.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed aiobotocore-2.12.3 aioitertools-0.11.0 botocore-1.34.69 fsspec-2024.3.1 jmespath-1.0.1 s3fs-2024.3.1\n", + "Collecting pyarrow==16.0.0\n", + " Downloading pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.8/40.8 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow==16.0.0) (1.25.2)\n", + "Installing collected packages: pyarrow\n", + " Attempting uninstall: pyarrow\n", + " Found existing installation: pyarrow 14.0.2\n", + " Uninstalling pyarrow-14.0.2:\n", + " Successfully uninstalled pyarrow-14.0.2\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 16.0.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed pyarrow-16.0.0\n", + "Collecting zarr\n", + " Downloading zarr-2.17.2-py3-none-any.whl (208 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.5/208.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: xarray[complete] in /usr/local/lib/python3.10/dist-packages (2023.7.0)\n", + "Collecting asciitree (from zarr)\n", + " Downloading asciitree-0.3.3.tar.gz (4.0 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from zarr) (1.25.2)\n", + "Collecting numcodecs>=0.10.0 (from zarr)\n", + " Downloading numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fasteners (from zarr)\n", + " Downloading fasteners-0.19-py3-none-any.whl (18 kB)\n", + "Requirement already satisfied: pandas>=1.4 in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2.0.3)\n", + "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (24.0)\n", + "Collecting netCDF4 (from xarray[complete])\n", + " Downloading netCDF4-1.6.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: h5netcdf in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.3.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.11.4)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2024.3.1)\n", + "Collecting cftime (from xarray[complete])\n", + " Downloading cftime-1.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m71.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pooch in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (1.8.1)\n", + "Collecting bottleneck (from xarray[complete])\n", + " Downloading Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (354 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m354.1/354.1 kB\u001b[0m \u001b[31m39.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting numbagg (from xarray[complete])\n", + " Downloading numbagg-0.8.1-py3-none-any.whl (48 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.4/48.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting flox (from xarray[complete])\n", + " Downloading flox-0.9.6-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.9/62.9 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: dask[complete] in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (2023.8.1)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (3.7.1)\n", + "Requirement already satisfied: seaborn in /usr/local/lib/python3.10/dist-packages (from xarray[complete]) (0.13.1)\n", + "Collecting nc-time-axis (from xarray[complete])\n", + " Downloading nc_time_axis-1.4.1-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.4->xarray[complete]) (2024.1)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (8.1.7)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (2.2.1)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (1.4.1)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (6.0.1)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (0.12.1)\n", + "Requirement already satisfied: importlib-metadata>=4.13.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (7.1.0)\n", + "Requirement already satisfied: pyarrow>=7.0 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (16.0.0)\n", + "Collecting lz4>=4.3.2 (from dask[complete]->xarray[complete])\n", + " Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m73.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting numpy-groupies>=0.9.19 (from flox->xarray[complete])\n", + " Downloading numpy_groupies-0.11.1-py3-none-any.whl (40 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from h5netcdf->xarray[complete]) (3.9.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (4.51.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (1.4.5)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->xarray[complete]) (3.1.2)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from netCDF4->xarray[complete]) (2024.2.2)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from numbagg->xarray[complete]) (0.58.1)\n", + "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch->xarray[complete]) (4.2.1)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch->xarray[complete]) (2.31.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=4.13.0->dask[complete]->xarray[complete]) (3.18.1)\n", + "Requirement already satisfied: locket in /usr/local/lib/python3.10/dist-packages (from partd>=1.2.0->dask[complete]->xarray[complete]) (1.0.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=1.4->xarray[complete]) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch->xarray[complete]) (2.0.7)\n", + "Requirement already satisfied: distributed==2023.8.1 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (2023.8.1)\n", + "Requirement already satisfied: bokeh>=2.4.2 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (3.3.4)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.10/dist-packages (from dask[complete]->xarray[complete]) (3.1.3)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (1.0.8)\n", + "Requirement already satisfied: psutil>=5.7.2 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (5.9.5)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (3.0.0)\n", + "Requirement already satisfied: tornado>=6.0.4 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (6.3.3)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from distributed==2023.8.1->dask[complete]->xarray[complete]) (3.0.0)\n", + "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->numbagg->xarray[complete]) (0.41.1)\n", + "Requirement already satisfied: xyzservices>=2021.09.1 in /usr/local/lib/python3.10/dist-packages (from bokeh>=2.4.2->dask[complete]->xarray[complete]) (2024.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.10.3->dask[complete]->xarray[complete]) (2.1.5)\n", + "Building wheels for collected packages: asciitree\n", + " Building wheel for asciitree (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=f4c9dc76bbe57736c6774f532134e8c8d677c7b73bc64d29866fa1c9bfd16312\n", + " Stored in directory: /root/.cache/pip/wheels/7f/4e/be/1171b40f43b918087657ec57cf3b81fa1a2e027d8755baa184\n", + "Successfully built asciitree\n", + "Installing collected packages: asciitree, numpy-groupies, numcodecs, lz4, fasteners, cftime, bottleneck, zarr, numbagg, netCDF4, nc-time-axis, flox\n", + "Successfully installed asciitree-0.3.3 bottleneck-1.3.8 cftime-1.6.3 fasteners-0.19 flox-0.9.6 lz4-4.3.3 nc-time-axis-1.4.1 netCDF4-1.6.5 numbagg-0.8.1 numcodecs-0.12.1 numpy-groupies-0.11.1 zarr-2.17.2\n", + "Collecting pandas==2.2.2\n", + " Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (1.25.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas==2.2.2) (1.16.0)\n", + "Installing collected packages: pandas\n", + " Attempting uninstall: pandas\n", + " Found existing installation: pandas 2.0.3\n", + " Uninstalling pandas-2.0.3:\n", + " Successfully uninstalled pandas-2.0.3\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires pandas==2.0.3, but you have pandas 2.2.2 which is incompatible.\n", + "ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 16.0.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed pandas-2.2.2\n" + ] + } + ], + "source": [ + "# only run once, then restart session and comment the next 3 lines\n", + "!pip install s3fs -U\n", + "!pip install pyarrow==16.0.0 -U\n", + "!pip install zarr xarray[complete]\n", + "!pip install pandas==2.2.2 -U" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "zeETrs8zQdza", + "outputId": "5af2116e-0fd5-46a4-fc2c-d0a26d8fe24c" + }, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "if not os.path.exists('parquet_queries.py'):\n", + " print('Downloading parquet_queries.py')\n", + " url = 'https://raw.githubusercontent.com/aodn/aodn_cloud_optimised/main/notebooks/parquet_queries.py'\n", + " response = requests.get(url)\n", + " with open('parquet_queries.py', 'w') as f:\n", + " f.write(response.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "XzHxvlw8Qdzc" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "dLLrZLPRQdzo", - "outputId": "4eaebf32-a48c-4ef3-f221-d3ed71f2e269", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(datetime.datetime(1998, 1, 1, 0, 0), datetime.datetime(2026, 1, 1, 0, 0))" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ], - "source": [ - "get_temporal_extent(parquet_ds)" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/lbesnard/miniforge3/envs/AodnCloudOptimised/lib/python3.10/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n", + " warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n" + ] + } + ], + "source": [ + "from parquet_queries import create_time_filter, create_bbox_filter, query_unique_value, plot_spatial_extent, get_spatial_extent, get_temporal_extent, get_schema_metadata\n", + "import pyarrow.parquet as pq\n", + "import pyarrow.dataset as pds\n", + "import pyarrow as pa\n", + "import os\n", + "import pandas as pd\n", + "import pyarrow.compute as pc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AycdabTJQdze" + }, + "source": [ + "## Location of the parquet dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "F5rE3sRuQdzf" + }, + "outputs": [], + "source": [ + "BUCKET_OPTIMISED_DEFAULT=\"imos-data-lab-optimised\"\n", + "dname = f\"s3://{BUCKET_OPTIMISED_DEFAULT}/cloud_optimised/cluster_testing/{dataset_name}.parquet/\"\n", + "parquet_ds = pq.ParquetDataset(dname,partitioning='hive')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9IWdsh9NQdzh" + }, + "source": [ + "# Understanding the Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DQWJBByXQdzh" + }, + "source": [ + "## Get partition keys\n", + "Partitioning in Parquet involves organising data files based on the values of one or more columns, known as partition keys. When data is written to Parquet files with partitioning enabled, the files are physically stored in a directory structure that reflects the partition keys. This directory structure makes it easier to retrieve and process specific subsets of data based on the partition keys." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "oBVjvC1tQdzi", + "outputId": "9896acb9-168b-4038-babc-ea4048e1d7fc" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "MkK9kuQdQdzp" - }, - "source": [ - "## Read Metadata\n", - "\n", - "For all parquet dataset, we create a sidecar file in the root of the dataset named **_common_matadata**. This contains the variable attributes." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "timestamp: int32\n", + "PLATFORM_NUMBER: int32\n", + "polygon: string\n" + ] + } + ], + "source": [ + "dataset = pds.dataset(dname, format=\"parquet\", partitioning=\"hive\")\n", + "\n", + "partition_keys = dataset.partitioning.schema\n", + "print(partition_keys)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nsP2odmAQdzk" + }, + "source": [ + "## List unique partition values" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "HIZF5O3cQdzl", + "outputId": "abb16db6-2a9c-49d9-daf2-c00e1afb38ea" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "MYOh_AiLQdzp", - "outputId": "d9578e1a-7a97-4c68-82b3-92a56e6cd765", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "{'DATA_TYPE': {'type': 'string',\n", - " 'long_name': 'Data type',\n", - " 'conventions': 'Argo reference table 1'},\n", - " 'FORMAT_VERSION': {'type': 'string', 'long_name': 'File format version'},\n", - " 'HANDBOOK_VERSION': {'type': 'string', 'long_name': 'Data handbook version'},\n", - " 'REFERENCE_DATE_TIME': {'type': 'string',\n", - " 'long_name': 'Date of reference for Julian days',\n", - " 'conventions': 'YYYYMMDDHHMISS'},\n", - " 'DATE_CREATION': {'type': 'string',\n", - " 'long_name': 'Date of file creation',\n", - " 'conventions': 'YYYYMMDDHHMISS'},\n", - " 'DATE_UPDATE': {'type': 'string',\n", - " 'long_name': 'Date of update of this file',\n", - " 'conventions': 'YYYYMMDDHHMISS'},\n", - " 'PLATFORM_NUMBER': {'type': 'string',\n", - " 'long_name': 'Float unique identifier',\n", - " 'conventions': 'WMO float identifier : A9IIIII'},\n", - " 'PROJECT_NAME': {'type': 'string', 'long_name': 'Name of the project'},\n", - " 'PI_NAME': {'type': 'string',\n", - " 'long_name': 'Name of the principal investigator'},\n", - " 'STATION_PARAMETERS': {'type': 'string',\n", - " 'long_name': 'List of available parameters for the station',\n", - " 'conventions': 'Argo reference table 3'},\n", - " 'CYCLE_NUMBER': {'type': 'double',\n", - " 'long_name': 'Float cycle number',\n", - " 'conventions': '0...N, 0 : launch cycle (if exists), 1 : first complete cycle'},\n", - " 'DIRECTION': {'type': 'string',\n", - " 'long_name': 'Direction of the station profiles',\n", - " 'conventions': 'A: ascending profiles, D: descending profiles'},\n", - " 'DATA_CENTRE': {'type': 'string'},\n", - " 'DC_REFERENCE': {'type': 'string',\n", - " 'long_name': 'Station unique identifier in data centre',\n", - " 'conventions': 'Data centre convention'},\n", - " 'DATA_STATE_INDICATOR': {'type': 'string',\n", - " 'long_name': 'Degree of processing the data have passed through',\n", - " 'conventions': 'Argo reference table 6'},\n", - " 'DATA_MODE': {'type': 'string',\n", - " 'long_name': 'Delayed mode or real time data',\n", - " 'conventions': 'R : real time; D : delayed mode; A : real time with adjustment'},\n", - " 'PLATFORM_TYPE': {'type': 'string',\n", - " 'long_name': 'Type of float',\n", - " 'conventions': 'Argo reference table 23'},\n", - " 'FLOAT_SERIAL_NO': {'type': 'string',\n", - " 'long_name': 'Serial number of the float'},\n", - " 'FIRMWARE_VERSION': {'type': 'string',\n", - " 'long_name': 'Instrument firmware version'},\n", - " 'WMO_INST_TYPE': {'type': 'string',\n", - " 'long_name': 'Coded instrument type',\n", - " 'conventions': 'Argo reference table 8'},\n", - " 'JULD': {'type': 'timestamp[ns]',\n", - " 'long_name': 'Julian day (UTC) of the station relative to REFERENCE_DATE_TIME',\n", - " 'standard_name': 'time',\n", - " 'conventions': 'Relative julian days with decimal part (as parts of day)',\n", - " 'resolution': 0.0,\n", - " 'axis': 'T'},\n", - " 'JULD_QC': {'type': 'string',\n", - " 'long_name': 'Quality on date and time',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'JULD_LOCATION': {'type': 'timestamp[ns]',\n", - " 'long_name': 'Julian day (UTC) of the location relative to REFERENCE_DATE_TIME',\n", - " 'conventions': 'Relative julian days with decimal part (as parts of day)',\n", - " 'resolution': 0.0},\n", - " 'LATITUDE': {'type': 'double',\n", - " 'long_name': 'Latitude of the station, best estimate',\n", - " 'standard_name': 'latitude',\n", - " 'units': 'degree_north',\n", - " 'valid_min': -90.0,\n", - " 'valid_max': 90.0,\n", - " 'axis': 'Y'},\n", - " 'LONGITUDE': {'type': 'double',\n", - " 'long_name': 'Longitude of the station, best estimate',\n", - " 'standard_name': 'longitude',\n", - " 'units': 'degree_east',\n", - " 'valid_min': -180.0,\n", - " 'valid_max': 180.0,\n", - " 'axis': 'X'},\n", - " 'POSITION_QC': {'type': 'string',\n", - " 'long_name': 'Quality on position (latitude and longitude)',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'POSITIONING_SYSTEM': {'type': 'string', 'long_name': 'Positioning system'},\n", - " 'PROFILE_PRES_QC': {'type': 'string',\n", - " 'long_name': 'Global quality flag of PRES profile',\n", - " 'conventions': 'Argo reference table 2a'},\n", - " 'PROFILE_TEMP_QC': {'type': 'string',\n", - " 'long_name': 'Global quality flag of TEMP profile',\n", - " 'conventions': 'Argo reference table 2a'},\n", - " 'PROFILE_PSAL_QC': {'type': 'string',\n", - " 'long_name': 'Global quality flag of PSAL profile',\n", - " 'conventions': 'Argo reference table 2a'},\n", - " 'VERTICAL_SAMPLING_SCHEME': {'type': 'string',\n", - " 'long_name': 'Vertical sampling scheme',\n", - " 'conventions': 'Argo reference table 16'},\n", - " 'CONFIG_MISSION_NUMBER': {'type': 'double',\n", - " 'long_name': 'Unique number denoting the missions performed by the float',\n", - " 'conventions': '1...N, 1 : first complete mission'},\n", - " 'PRES': {'type': 'float',\n", - " 'long_name': 'Sea water pressure, equals 0 at sea-level',\n", - " 'standard_name': 'sea_water_pressure',\n", - " 'units': 'decibar',\n", - " 'valid_min': 0.0,\n", - " 'valid_max': 12000.0,\n", - " 'C_format': '%7.1f',\n", - " 'FORTRAN_format': 'F7.1',\n", - " 'resolution': 1.0,\n", - " 'axis': 'Z'},\n", - " 'PRES_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'PRES_ADJUSTED': {'type': 'float',\n", - " 'long_name': 'Sea water pressure, equals 0 at sea-level',\n", - " 'standard_name': 'sea_water_pressure',\n", - " 'units': 'decibar',\n", - " 'valid_min': 0.0,\n", - " 'valid_max': 12000.0,\n", - " 'C_format': '%7.1f',\n", - " 'FORTRAN_format': 'F7.1',\n", - " 'resolution': 1.0,\n", - " 'axis': 'Z'},\n", - " 'PRES_ADJUSTED_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'PRES_ADJUSTED_ERROR': {'type': 'float',\n", - " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", - " 'units': 'decibar',\n", - " 'C_format': '%7.1f',\n", - " 'FORTRAN_format': 'F7.1',\n", - " 'resolution': 1.0},\n", - " 'TEMP': {'type': 'float',\n", - " 'long_name': 'Sea temperature in-situ ITS-90 scale',\n", - " 'standard_name': 'sea_water_temperature',\n", - " 'units': 'degree_Celsius',\n", - " 'valid_min': -2.5,\n", - " 'valid_max': 40.0,\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'TEMP_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'TEMP_ADJUSTED': {'type': 'float',\n", - " 'long_name': 'Sea temperature in-situ ITS-90 scale',\n", - " 'standard_name': 'sea_water_temperature',\n", - " 'units': 'degree_Celsius',\n", - " 'valid_min': -2.5,\n", - " 'valid_max': 40.0,\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'TEMP_ADJUSTED_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'TEMP_ADJUSTED_ERROR': {'type': 'float',\n", - " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", - " 'units': 'degree_Celsius',\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'PSAL': {'type': 'float',\n", - " 'long_name': 'Practical salinity',\n", - " 'standard_name': 'sea_water_salinity',\n", - " 'units': 'psu',\n", - " 'valid_min': 2.0,\n", - " 'valid_max': 41.0,\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'PSAL_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'PSAL_ADJUSTED': {'type': 'float',\n", - " 'long_name': 'Practical salinity',\n", - " 'standard_name': 'sea_water_salinity',\n", - " 'units': 'psu',\n", - " 'valid_min': 2.0,\n", - " 'valid_max': 41.0,\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'PSAL_ADJUSTED_QC': {'type': 'string',\n", - " 'long_name': 'quality flag',\n", - " 'conventions': 'Argo reference table 2'},\n", - " 'PSAL_ADJUSTED_ERROR': {'type': 'float',\n", - " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", - " 'units': 'psu',\n", - " 'C_format': '%9.3f',\n", - " 'FORTRAN_format': 'F9.3',\n", - " 'resolution': 0.0010000000474974513},\n", - " 'PARAMETER': {'type': 'string',\n", - " 'long_name': 'List of parameters with calibration information',\n", - " 'conventions': 'Argo reference table 3'},\n", - " 'SCIENTIFIC_CALIB_EQUATION': {'type': 'string',\n", - " 'long_name': 'Calibration equation for this parameter'},\n", - " 'SCIENTIFIC_CALIB_COEFFICIENT': {'type': 'string',\n", - " 'long_name': 'Calibration coefficients for this equation'},\n", - " 'SCIENTIFIC_CALIB_COMMENT': {'type': 'string',\n", - " 'long_name': 'Comment applying to this parameter calibration'},\n", - " 'SCIENTIFIC_CALIB_DATE': {'type': 'string',\n", - " 'long_name': 'Date of calibration',\n", - " 'conventions': 'YYYYMMDDHHMISS'},\n", - " 'HISTORY_INSTITUTION': {'type': 'string',\n", - " 'long_name': 'Institution which performed action',\n", - " 'conventions': 'Argo reference table 4'},\n", - " 'HISTORY_STEP': {'type': 'string',\n", - " 'long_name': 'Step in data processing',\n", - " 'conventions': 'Argo reference table 12'},\n", - " 'HISTORY_SOFTWARE': {'type': 'string',\n", - " 'long_name': 'Name of software which performed action',\n", - " 'conventions': 'Institution dependent'},\n", - " 'HISTORY_SOFTWARE_RELEASE': {'type': 'string',\n", - " 'long_name': 'Version/release of software which performed action',\n", - " 'conventions': 'Institution dependent'},\n", - " 'HISTORY_REFERENCE': {'type': 'string',\n", - " 'long_name': 'Reference of database',\n", - " 'conventions': 'Institution dependent'},\n", - " 'HISTORY_DATE': {'type': 'string',\n", - " 'long_name': 'Date the history record was created',\n", - " 'conventions': 'YYYYMMDDHHMISS'},\n", - " 'HISTORY_ACTION': {'type': 'string',\n", - " 'long_name': 'Action performed on data',\n", - " 'conventions': 'Argo reference table 7'},\n", - " 'HISTORY_PARAMETER': {'type': 'string',\n", - " 'long_name': 'Station parameter action is performed on',\n", - " 'conventions': 'Argo reference table 3'},\n", - " 'HISTORY_START_PRES': {'type': 'float',\n", - " 'long_name': 'Start pressure action applied on',\n", - " 'units': 'decibar'},\n", - " 'HISTORY_STOP_PRES': {'type': 'float',\n", - " 'long_name': 'Stop pressure action applied on',\n", - " 'units': 'decibar'},\n", - " 'HISTORY_PREVIOUS_VALUE': {'type': 'float',\n", - " 'long_name': 'Parameter/Flag previous value before action'},\n", - " 'HISTORY_QCTEST': {'type': 'string',\n", - " 'long_name': 'Documentation of tests performed, tests failed (in hex form)',\n", - " 'conventions': 'Write tests performed when ACTION=QCP$; tests failed when ACTION=QCF$'},\n", - " 'filename': {'type': 'string'},\n", - " 'timestamp': {'type': 'int64'},\n", - " 'polygon': {'type': 'string'},\n", - " 'dataset_metadata': {'metadata_uuid': '4402cb50-e20a-44ee-93e6-4728259250d2',\n", - " 'title': 'Argo Core'}}" - ] - }, - "metadata": {}, - "execution_count": 11 - } - ], - "source": [ - "# parquet_meta = pa.parquet.read_schema(os.path.join(dname + '_common_metadata')) # parquet metadata\n", - "get_schema_metadata(dname) # schema metadata" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "['4902486', '3901894']\n", + "CPU times: user 1.12 s, sys: 20.1 ms, total: 1.14 s\n", + "Wall time: 1.13 s\n" + ] + } + ], + "source": [ + "%%time\n", + "unique_partition_value = query_unique_value(parquet_ds, 'PLATFORM_NUMBER')\n", + "print(list(unique_partition_value)[0:2]) # showing a subset only" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B6mnQtRWQdzm" + }, + "source": [ + "## Visualise Spatial Extent of the dataset\n", + "In this section, we're plotting the polygons where data exists. This helps then with creating a bounding box where there is data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 318 }, + "id": "owI5Rx0kQdzn", + "outputId": "d310299b-a056-4b71-f60f-05bfd25e00bb" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "EzmbSF4oQdzq" - }, - "source": [ - "# Data Query and Plot" + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAEtCAYAAAAMbbo5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAntklEQVR4nO3dfXRU9Z3H8U8CZCBCEsjTJCUEgi1U5FnNBpXFkhIoq6VSj/WpxCIsNOCBUAvZykP0uMlKRXc9KO1WEs5RC3rqU7sWG5DQ4xLUpUaKSo5BHqIkwdImA1QTIL/9o2XqSO4kE2bmzty8X+fcQ+be39z5/mbunfnym/udX4wxxggAAMDBYu0OAAAAINRIeAAAgOOR8AAAAMcj4QEAAI5HwgMAAByPhAcAADgeCQ8AAHC8vnYHEA4dHR06fvy4Bg0apJiYGLvDAQAA3WCM0alTp5SZmanY2Esbo+kVCc/x48eVlZVldxgAAKAHGhoaNHTo0EvaR69IeAYNGiTpb09YQkKCzdEAAIDu8Hg8ysrK8n6OX4pekfBc+BorISGBhAcAgCgTjMtRuGgZAAA4HgkPAABwPBIeAADgeCQ8AADA8Uh4AACA45HwAAAAxyPhAQAAjkfCAwAAHI+EBwAAOF6v+KVlW6xbZ72tujqwfU2bFvhjBEugj9GTmIL1GMGMNRz9DjWrmAI9/iTrY9BKTx4D4RWs19TffiLxvAhUMM+jaGLn506IMMIDAAAcj4QHAAA4HgkPAABwPBIeAADgeCQ8AADA8ajSChV/V/AfOdL5+uHDQxAIEAWszomWlnBG0btYPedWrF4Lf/txQiUTx6ZjMMIDAAAcj4QHAAA4HgkPAABwPBIeAADgeCQ8AADA8WKMMcbuIELN4/EoMTFRra2tSkhICO7OI3GelUDnyJGia34vRC+r46OysvP14aiESUoK/D5OrtCxej7srCLtSaVUJPYjmkTIe3wwP78Z4QEAAI5HwgMAABzP9oRn+PDhiomJuWgpKiqSJE2bNu2ibYsWLbI5agAAEE1s/6Xlt99+W+fPn/fePnDggL75zW/qlltu8a5bsGCBHnjgAe/t+Pj4sMYIAACim+0JT2pqqs/t8vJyjRw5Uv/8z//sXRcfHy+3293tfba1tamtrc172+PxXHqgAAAgatme8HxRe3u7nn76aRUXFysmJsa7/plnntHTTz8tt9utG2+8UatXr/Y7ylNWVqbS0tJwhGxd3RToPDVS8Co//D22VYVCoFUQgfbP6dVb4ajWi5CqiW49dqDnhZOrnoLJX0VZsKqPnHCcSZHZD9gqohKel156SS0tLSosLPSuu/3225Wdna3MzEzt379fK1euVF1dnV544QXL/ZSUlKi4uNh72+PxKCsrK5ShAwCACBZRCc9TTz2lWbNmKTMz07tu4cKF3r/Hjh2rjIwMTZ8+XYcOHdLIkSM73Y/L5ZLL5Qp5vAAAIDrYXqV1wdGjR7Vjxw7dc889ftvl5uZKkurr68MRFgAAcICISXgqKiqUlpam2bNn+21XW1srScrIyAhDVAAAwAki4iutjo4OVVRUaN68eerb9x8hHTp0SM8++6y+9a1vKTk5Wfv379fy5cs1depUjRs3zsaIAQBANImIhGfHjh06duyYfvCDH/isj4uL044dO/TYY4/pzJkzysrK0ty5c3X//ffbFCkAAIhGEZHwzJgxQ53NYZqVlaXdu3fbEFEArEofrSZDlKxLcMMx2V2gE4vaOQmqk/kr6w/1TwEEs4TeCWXmgZ6PkjRhQufrA30+Aj3n/Z2/Ti63dnLfEDYRcw0PAABAqJDwAAAAxyPhAQAAjkfCAwAAHI+EBwAAOF5EVGk5Uk8qq4I12V1PKhqogrg0gT5//qpt/v7jmhexqvSxqgjsrRN4+quu6oxVv/2dw1bPbaAVcExwiXDwdzz1omOTER4AAOB4JDwAAMDxSHgAAIDjkfAAAADHI+EBAACOR5XWpQq0EkYKvIIr0MqPQOdc8rcvK3ZewR+OqrVoYlVlZFXthdBx+rGGroWjSjaYnwlOqc7sBkZ4AACA45HwAAAAxyPhAQAAjkfCAwAAHI+EBwAAOB5VWqHi78p3q+qZQKurgnl1faAVPYHGSvWKL39zaTl9rqtABDovFhDJAp23ygrvET3CCA8AAHA8Eh4AAOB4tic869atU0xMjM8yevRo7/bPP/9cRUVFSk5O1sCBAzV37lw1NzfbGDEAAIg2tic8kjRmzBg1NjZ6lzfeeMO7bfny5fr1r3+t559/Xrt379bx48d188032xgtAACINhFx0XLfvn3ldrsvWt/a2qqnnnpKzz77rL7xjW9IkioqKvT1r39de/fu1T/90z91ur+2tja1tbV5b3s8ntAEDgAAokJEJDwffvihMjMz1b9/f+Xl5amsrEzDhg3Tvn37dPbsWeXn53vbjh49WsOGDVNNTY1lwlNWVqbS0tJwhR880XSFvVWslZWB78vJc2MFOueNZD3XGpUZvgKdky5Q/ipnnHzMont6cm5zDtvK9q+0cnNzVVlZqe3bt+vJJ5/U4cOHdf311+vUqVNqampSXFyckr5Umpqenq6mpibLfZaUlKi1tdW7NDQ0hLgXAAAgktk+wjNr1izv3+PGjVNubq6ys7P13HPPacCAAT3ap8vlksvlClaIAAAgytk+wvNlSUlJ+trXvqb6+nq53W61t7er5UvDfc3NzZ1e8wMAANCZiEt4Tp8+rUOHDikjI0OTJ09Wv379tHPnTu/2uro6HTt2THl5eTZGCQAAoontX2n96Ec/0o033qjs7GwdP35ca9euVZ8+fXTbbbcpMTFR8+fPV3FxsYYMGaKEhAQtXbpUeXl5lhcsAwAAfJntCc/HH3+s2267TSdPnlRqaqquu+467d27V6mpqZKkRx99VLGxsZo7d67a2tpUUFCgJ554wuaoYcmq2sBf5YITWFVsWFWt+avKYP6of/D3PAU6n5sVq2qvnlTbWAl0DiWqusIv0KorKq6iju0Jz9atW/1u79+/vzZu3KiNGzeGKSIAAOA0EXcNDwAAQLCR8AAAAMcj4QEAAI5HwgMAAByPhAcAADie7VVaQFQJZvm5FcpauydYz1NtbXD2Y/djoHvCcQ4jIjHCAwAAHI+EBwAAOB4JDwAAcDwSHgAA4HgkPAAAwPGo0kJ4+Jts0apqItQTKPrbPxMGRgZ/E6laTfoZia+RVT+sJg9F6Fid25zDvqzOI7ver4OAER4AAOB4JDwAAMDxSHgAAIDjkfAAAADHI+EBAACOR5UWwsNfBYTVHDZWAq0SCLTiSqJiI9ysqpgKC63vE6zjwEowjw+rirJAK1t6UlloVQkWBVU1QDAxwgMAAByPhAcAADie7QlPWVmZrr76ag0aNEhpaWmaM2eO6urqfNpMmzZNMTExPsuiRYtsihgAAEQb2xOe3bt3q6ioSHv37lVVVZXOnj2rGTNm6MyZMz7tFixYoMbGRu/y8MMP2xQxAACINrZftLx9+3af25WVlUpLS9O+ffs0depU7/r4+Hi53e5u7bOtrU1tbW3e2x6PJzjBAgCAqGR7wvNlra2tkqQhQ4b4rH/mmWf09NNPy+1268Ybb9Tq1asVHx/f6T7KyspUWloa8lgRJFYVL4FWb1lVqdTWBrYfhE6g1Vg9qSQKVvVRMOdaCxZ/lWZWx3mgMYW6+k0KXuVYJL5GThGsysIIElEJT0dHh5YtW6Zrr71WV155pXf97bffruzsbGVmZmr//v1auXKl6urq9MILL3S6n5KSEhUXF3tvezweZWVlhTx+AAAQmSIq4SkqKtKBAwf0xhtv+KxfuHCh9++xY8cqIyND06dP16FDhzRy5MiL9uNyueRyuUIeLwAAiA62X7R8wZIlS/Sb3/xGu3bt0tChQ/22zc3NlSTV19eHIzQAABDlbB/hMcZo6dKlevHFF1VdXa0RI0Z0eZ/av39XnZGREeLoAACAE9ie8BQVFenZZ5/Vyy+/rEGDBqmpqUmSlJiYqAEDBujQoUN69tln9a1vfUvJycnav3+/li9frqlTp2rcuHE2Rw8AAKJBjDHG2BpATEyn6ysqKlRYWKiGhgbdeeedOnDggM6cOaOsrCx95zvf0f3336+EhIRuPYbH41FiYqJaW1u7fZ9us6o2oDIodKwqfawwL1bkmDCh8/U9qfSJRFbvB1aVQVaVMFaCOb9XoBVzwayC7MncaZ3xV8nJeX9pwlFR2Q3B/Py2fYSnq3wrKytLu3fvDlM0AADAiSLmomUAAIBQIeEBAACOR8IDAAAcj4QHAAA4nu0XLQMBo/oCwRTM+bqsqrSsqonsrOYM1hx2wXzsQKv1eC9AABjhAQAAjkfCAwAAHI+EBwAAOB4JDwAAcDwSHgAA4HgkPAAAwPEoSweCwWqiPavJIYM5CWQ0seq3VTl3T1jtK0STG17SY1iVgNt5DDj5+EOvxggPAABwPBIeAADgeCQ8AADA8Uh4AACA45HwAAAAx6NKCwgGq2osq8kQ/VXzWN3HqsIpmqpqrGIN5iSa/irgOhPM6i2rfQU6KWZvFehrBwSAER4AAOB4UZPwbNy4UcOHD1f//v2Vm5urt956y+6QAABAlIiKhGfbtm0qLi7W2rVr9Yc//EHjx49XQUGBTpw4YXdoAAAgCkRFwrNhwwYtWLBAd999t6644gpt2rRJ8fHx2rx5s92hAQCAKBDxCU97e7v27dun/Px877rY2Fjl5+erpqam0/u0tbXJ4/H4LAAAoPeK+CqtP/3pTzp//rzS09N91qenp+vgwYOd3qesrEylpaXhCA/omZ5UBgWrAsgJ1V7+WPUjHJVSVo8RzCo0J4umY9Bq/jx/oql/VpWn4ZiTLkQifoSnJ0pKStTa2updGhoa7A4JAADYKOJHeFJSUtSnTx81Nzf7rG9ubpbb7e70Pi6XSy6XKxzhAQCAKBDxIzxxcXGaPHmydu7c6V3X0dGhnTt3Ki8vz8bIAABAtIj4ER5JKi4u1rx583TVVVfpmmuu0WOPPaYzZ87o7rvvtjs0AAAQBaIi4bn11lv16aefas2aNWpqatKECRO0ffv2iy5kBgAA6ExUJDyStGTJEi1ZssTuMAD7BKs6Ytq0ztdTSXTprJ5bp1fG9UZWVUz+RNM5ZnXMWr0PRUH1VsRfwwMAAHCpSHgAAIDjkfAAAADHI+EBAACOR8IDAAAcL2qqtICI5sCKBqBXsZoby6oay6oizx+q9WzFCA8AAHA8Eh4AAOB4JDwAAMDxSHgAAIDjkfAAAADHI+EBAACOR1k6EAxWZaXV1eGMwpdV6btVaSwundXrTdlx+AWrzDwcPyFRWdn5ejuPG6vnKYp/UoMRHgAA4HgkPAAAwPFIeAAAgOOR8AAAAMcj4QEAAI5HlRYQ7ayqJiKx8sMpqICLfFZVRpFYOWkVU21tiALpBgdOiMwIDwAAcDzbEp4jR45o/vz5GjFihAYMGKCRI0dq7dq1am9v92kTExNz0bJ37167wgYAAFHItq+0Dh48qI6ODv3sZz/T5ZdfrgMHDmjBggU6c+aMfvrTn/q03bFjh8aMGeO9nZycHO5wAQBAFLMt4Zk5c6ZmzpzpvZ2Tk6O6ujo9+eSTFyU8ycnJcrvd4Q4RAAA4RERdw9Pa2qohQ4ZctP6mm25SWlqarrvuOr3yyitd7qetrU0ej8dnAQAAvVfEVGnV19fr8ccf9xndGThwoB555BFde+21io2N1a9+9SvNmTNHL730km666SbLfZWVlam0tDQcYQPhQzXWpbGqOrGaQ6kn++I5jxzBfL0D2U8UVCv1VkEf4Vm1alWnFxp/cTl48KDPfT755BPNnDlTt9xyixYsWOBdn5KSouLiYuXm5urqq69WeXm57rzzTq1fv95vDCUlJWptbfUuDQ0Nwe4mAACIIkEf4VmxYoUKCwv9tsnJyfH+ffz4cd1www2aMmWKfv7zn3e5/9zcXFVVVflt43K55HK5uhUvAABwvqAnPKmpqUpNTe1W208++UQ33HCDJk+erIqKCsXGdj3gVFtbq4yMjEsNEwAA9CK2XcPzySefaNq0acrOztZPf/pTffrpp95tFyqytmzZori4OE2cOFGS9MILL2jz5s36xS9+YUvMAAAgOtmW8FRVVam+vl719fUaOnSozzZjjPfvBx98UEePHlXfvn01evRobdu2Td/97nfDHS4AAIhiMeaL2YVDeTweJSYmqrW1VQkJCcHdudWV+nbOgYLIMWFC5+v9zedDNRYQ+ZKSOl9vNYeXP5FY9deT964QCObnd0T9Dg8AAEAokPAAAADHI+EBAACOR8IDAAAcj4QHAAA4XsTMpQXg76yqIKjGAiKH1flIhW7EYoQHAAA4HgkPAABwPBIeAADgeCQ8AADA8Uh4AACA41GlBQSD1bw6VnOtWc2XJVnPqwMA6DFGeAAAgOOR8AAAAMcj4QEAAI5HwgMAAByPhAcAADgeCQ8AAHA8ytIBO1hNECoxSSgAhAAjPAAAwPFsTXiGDx+umJgYn6W8vNynzf79+3X99derf//+ysrK0sMPP2xTtAAAIFrZ/pXWAw88oAULFnhvDxo0yPu3x+PRjBkzlJ+fr02bNumPf/yjfvCDHygpKUkLFy60I1wAABCFbE94Bg0aJLfb3em2Z555Ru3t7dq8ebPi4uI0ZswY1dbWasOGDSQ8AACg22y/hqe8vFzJycmaOHGi1q9fr3Pnznm31dTUaOrUqYqLi/OuKygoUF1dnf7yl79Y7rOtrU0ej8dnAQAAvZetIzz33nuvJk2apCFDhmjPnj0qKSlRY2OjNmzYIElqamrSiBEjfO6Tnp7u3TZ48OBO91tWVqbS0tLQBg98kVVlVWVlOKMAAFgI+gjPqlWrLroQ+cvLwYMHJUnFxcWaNm2axo0bp0WLFumRRx7R448/rra2tkuKoaSkRK2trd6loaEhGF0DAABRKugjPCtWrFBhYaHfNjk5OZ2uz83N1blz53TkyBGNGjVKbrdbzc3NPm0u3La67keSXC6XXC5XYIEDAADHCnrCk5qaqtTU1B7dt7a2VrGxsUpLS5Mk5eXl6Sc/+YnOnj2rfv36SZKqqqo0atQoy6+zAAAAvsy2i5Zramr02GOP6d1339VHH32kZ555RsuXL9edd97pTWZuv/12xcXFaf78+Xrvvfe0bds2/ed//qeKi4vtChsAAEQh2y5adrlc2rp1q9atW6e2tjaNGDFCy5cv90lmEhMT9bvf/U5FRUWaPHmyUlJStGbNGkrSAQBAQGKMMcbuIELN4/EoMTFRra2tSkhICO7Op03rfH1tbXAfBwCAcElK6ny91TW669aFJIxgfn7b/js8AAAAoUbCAwAAHI+EBwAAOB4JDwAAcDwSHgAA4Hi2z5YO9EpWFRD+WM3XBSB0Aj1XnXKeWvWjujqcUQQVIzwAAMDxSHgAAIDjkfAAAADHI+EBAACOR8IDAAAcjyotIBBWFRvDhwe2H6s52Pyxqo44cqTz9U6pFrESrNfCitXz6k+wHhuRI9BzNRxVTOE4563Or568d0UIRngAAIDjkfAAAADHI+EBAACOR8IDAAAcj4QHAAA4HlVavU2oK1vCIdDqmWD2zapCYd264D1GoKweO4rnvOmWUL8WPdmPnccBeg+rY7+2NniPYfW+GcXHOCM8AADA8Uh4AACA49mW8FRXVysmJqbT5e2335YkHTlypNPte/futStsAAAQhWy7hmfKlClqbGz0Wbd69Wrt3LlTV111lc/6HTt2aMyYMd7bycnJYYkRAAA4g20JT1xcnNxut/f22bNn9fLLL2vp0qWKiYnxaZucnOzTFgAAIBARcw3PK6+8opMnT+ruu+++aNtNN92ktLQ0XXfddXrllVe63FdbW5s8Ho/PAgAAeq+IKUt/6qmnVFBQoKFDh3rXDRw4UI888oiuvfZaxcbG6le/+pXmzJmjl156STfddJPlvsrKylRaWhqOsKNvIrVILKsOVKCxOr28OJpijSY8r4hU4fjcibbPtm4I+gjPqlWrLC9GvrAcPHjQ5z4ff/yxXnvtNc2fP99nfUpKioqLi5Wbm6urr75a5eXluvPOO7V+/Xq/MZSUlKi1tdW7NDQ0BLubAAAgigR9hGfFihUqLCz02yYnJ8fndkVFhZKTk/2O2lyQm5urqqoqv21cLpdcLleX+wIAAL1D0BOe1NRUpaamdru9MUYVFRX6/ve/r379+nXZvra2VhkZGZcSIgAA6GVsv4bn9ddf1+HDh3XPPfdctG3Lli2Ki4vTxIkTJUkvvPCCNm/erF/84hfhDhMAAEQx2xOep556SlOmTNHo0aM73f7ggw/q6NGj6tu3r0aPHq1t27bpu9/9bpijBAAA0SzGGGPsDiLUPB6PEhMT1draqoSEBLvDQTQIRyUYAMCvYH5+R8zv8AAAAIQKCQ8AAHA8Eh4AAOB4JDwAAMDxSHgAAIDj2V6WDkQkqq4A9GYOrFRlhAcAADgeCQ8AAHA8Eh4AAOB4JDwAAMDxSHgAAIDjUaUFAOhaT6pwoqByB70HIzwAAMDxSHgAAIDjkfAAAADHI+EBAACOR8IDAAAcjyotAADgy4EVdozwAAAAxyPhAQAAjheyhOehhx7SlClTFB8fr6SkpE7bHDt2TLNnz1Z8fLzS0tJ033336dy5cz5tqqurNWnSJLlcLl1++eWqrKwMVcgAAMChQpbwtLe365ZbbtHixYs73X7+/HnNnj1b7e3t2rNnj7Zs2aLKykqtWbPG2+bw4cOaPXu2brjhBtXW1mrZsmW655579Nprr4UqbAAA4EAxxhgTygeorKzUsmXL1NLS4rP+t7/9rf7lX/5Fx48fV3p6uiRp06ZNWrlypT799FPFxcVp5cqV+p//+R8dOHDAe7/vfe97amlp0fbt27sdg8fjUWJiolpbW5WQkBCUfgFAr8LUErBBMD+/bbuGp6amRmPHjvUmO5JUUFAgj8ej9957z9smPz/f534FBQWqqanxu++2tjZ5PB6fBQAA9F62laU3NTX5JDuSvLebmpr8tvF4PPrss880YMCATvddVlam0tLSEEQNAL0UozWIcgGN8KxatUoxMTF+l4MHD4Yq1m4rKSlRa2urd2loaLA7JAAAYKOARnhWrFihwsJCv21ycnK6tS+326233nrLZ11zc7N324V/L6z7YpuEhATL0R1Jcrlccrlc3YoDAAA4X0AJT2pqqlJTU4PywHl5eXrooYd04sQJpaWlSZKqqqqUkJCgK664wtvm1Vdf9blfVVWV8vLyghIDAADoHUJ20fKxY8dUW1urY8eO6fz586qtrVVtba1Onz4tSZoxY4auuOIK3XXXXXr33Xf12muv6f7771dRUZF3dGbRokX66KOP9OMf/1gHDx7UE088oeeee07Lly8PVdgAAMCBQlaWXlhYqC1btly0fteuXZo2bZok6ejRo1q8eLGqq6t12WWXad68eSovL1ffvv8YeKqurtby5cv1/vvva+jQoVq9enWXX6t9GWXpAABEn2B+fof8d3giAQkPAADRxxG/wwMAABAuJDwAAMDxSHgAAIDj2fZLy+F04TIlppgAACB6XPjcDsblxr0i4Tl16pQkKSsry+ZIAABAoE6dOqXExMRL2kevqNLq6OjQ8ePHNWjQIMXExNgdTkh5PB5lZWWpoaGhV1Wk0W/67XS9sc8S/e7t/TbG6NSpU8rMzFRs7KVdhdMrRnhiY2M1dOhQu8MIq4SEhF51klxAv3uX3tjv3thniX73Nl/s96WO7FzARcsAAMDxSHgAAIDjkfA4jMvl0tq1a3vdbPH0m347XW/ss0S/6Xfw9IqLlgEAQO/GCA8AAHA8Eh4AAOB4JDwAAMDxSHgAAIDjkfAAAADHI+GJYg899JCmTJmi+Ph4JSUlddomJibmomXr1q0+baqrqzVp0iS5XC5dfvnlqqysDH3wl6A7/T527Jhmz56t+Ph4paWl6b777tO5c+d82kRbv79s+PDhF7225eXlPm3279+v66+/Xv3791dWVpYefvhhm6INno0bN2r48OHq37+/cnNz9dZbb9kdUlCtW7fuotd19OjR3u2ff/65ioqKlJycrIEDB2ru3Llqbm62MeKe+f3vf68bb7xRmZmZiomJ0UsvveSz3RijNWvWKCMjQwMGDFB+fr4+/PBDnzZ//vOfdccddyghIUFJSUmaP3++Tp8+HcZeBKarPhcWFl702s+cOdOnTbT1WZLKysp09dVXa9CgQUpLS9OcOXNUV1fn06Y7x3V33tf9IeGJYu3t7brlllu0ePFiv+0qKirU2NjoXebMmePddvjwYc2ePVs33HCDamtrtWzZMt1zzz167bXXQhx9z3XV7/Pnz2v27Nlqb2/Xnj17tGXLFlVWVmrNmjXeNtHY78488MADPq/t0qVLvds8Ho9mzJih7Oxs7du3T+vXr9e6dev085//3MaIL822bdtUXFystWvX6g9/+IPGjx+vgoICnThxwu7QgmrMmDE+r+sbb7zh3bZ8+XL9+te/1vPPP6/du3fr+PHjuvnmm22MtmfOnDmj8ePHa+PGjZ1uf/jhh/Vf//Vf2rRpk958801ddtllKigo0Oeff+5tc8cdd+i9995TVVWVfvOb3+j3v/+9Fi5cGK4uBKyrPkvSzJkzfV77X/7ylz7bo63PkrR7924VFRVp7969qqqq0tmzZzVjxgydOXPG26ar47o77+tdMoh6FRUVJjExsdNtksyLL75oed8f//jHZsyYMT7rbr31VlNQUBDECEPDqt+vvvqqiY2NNU1NTd51Tz75pElISDBtbW3GmOju9wXZ2dnm0Ucftdz+xBNPmMGDB3v7bIwxK1euNKNGjQpDdKFxzTXXmKKiIu/t8+fPm8zMTFNWVmZjVMG1du1aM378+E63tbS0mH79+pnnn3/eu+6DDz4wkkxNTU2YIgy+L79PdXR0GLfbbdavX+9d19LSYlwul/nlL39pjDHm/fffN5LM22+/7W3z29/+1sTExJhPPvkkbLH3VGfvzfPmzTPf/va3Le8T7X2+4MSJE0aS2b17tzGme8d1d97Xu8IITy9QVFSklJQUXXPNNdq8ebPMF35rsqamRvn5+T7tCwoKVFNTE+4wg6ampkZjx45Venq6d11BQYE8Ho/ee+89bxsn9Lu8vFzJycmaOHGi1q9f7zO8W1NTo6lTpyouLs67rqCgQHV1dfrLX/5iR7iXpL29Xfv27fN53WJjY5Wfnx91r1tXPvzwQ2VmZionJ0d33HGHjh07Jknat2+fzp496/McjB49WsOGDXPUc3D48GE1NTX59DMxMVG5ubneftbU1CgpKUlXXXWVt01+fr5iY2P15ptvhj3mYKmurlZaWppGjRqlxYsX6+TJk95tTulza2urJGnIkCGSundcd+d9vSu9Yrb03uyBBx7QN77xDcXHx+t3v/udfvjDH+r06dO69957JUlNTU0+B5Akpaeny+Px6LPPPtOAAQPsCPuSWPXpwjZ/baKp3/fee68mTZqkIUOGaM+ePSopKVFjY6M2bNgg6W99HDFihM99vvg8DB48OOwxX4o//elPOn/+fKev28GDB22KKvhyc3NVWVmpUaNGqbGxUaWlpbr++ut14MABNTU1KS4u7qJr19LT073HthNc6Etnr/UXz+G0tDSf7X379tWQIUOi9rmYOXOmbr75Zo0YMUKHDh3Sv/3bv2nWrFmqqalRnz59HNHnjo4OLVu2TNdee62uvPJKSerWcd2d9/WukPBEmFWrVuk//uM//Lb54IMPfC5i9Gf16tXevydOnKgzZ85o/fr13oQnUgS739EqkOehuLjYu27cuHGKi4vTv/7rv6qsrKzXzb/jJLNmzfL+PW7cOOXm5io7O1vPPfdcVCTi6Lnvfe973r/Hjh2rcePGaeTIkaqurtb06dNtjCx4ioqKdODAAZ/r0sKFhCfCrFixQoWFhX7b5OTk9Hj/ubm5evDBB9XW1iaXyyW3233RlfDNzc1KSEgI65trMPvtdrsvqty50Ee32+39NxL6/WWX8jzk5ubq3LlzOnLkiEaNGmXZR+kfz0M0SUlJUZ8+fTrtUzT2p7uSkpL0ta99TfX19frmN7+p9vZ2tbS0+Pxv2GnPwYW+NDc3KyMjw7u+ublZEyZM8Lb58sXq586d05///GfHPBc5OTlKSUlRfX29pk+fHvV9XrJkifdC66FDh3rXu93uLo/r7ryvd4WEJ8KkpqYqNTU1ZPuvra3V4MGDvSMAeXl5evXVV33aVFVVKS8vL2QxdCaY/c7Ly9NDDz2kEydOeId/q6qqlJCQoCuuuMLbJhL6/WWX8jzU1tYqNjbW2+e8vDz95Cc/0dmzZ9WvXz9Jf+vjqFGjou7rLEmKi4vT5MmTtXPnTm+lYUdHh3bu3KklS5bYG1wInT59WocOHdJdd92lyZMnq1+/ftq5c6fmzp0rSaqrq9OxY8dsP3aDacSIEXK73dq5c6c3wfF4PHrzzTe91Zl5eXlqaWnRvn37NHnyZEnS66+/ro6ODuXm5toVelB9/PHHOnnypDfpi9Y+G2O0dOlSvfjii6qurr7oq/buHNfdeV/vTiCIUkePHjXvvPOOKS0tNQMHDjTvvPOOeeedd8ypU6eMMca88sor5r//+7/NH//4R/Phhx+aJ554wsTHx5s1a9Z49/HRRx+Z+Ph4c99995kPPvjAbNy40fTp08ds377drm51qat+nzt3zlx55ZVmxowZpra21mzfvt2kpqaakpIS7z6isd9ftGfPHvPoo4+a2tpac+jQIfP000+b1NRU8/3vf9/bpqWlxaSnp5u77rrLHDhwwGzdutXEx8ebn/3sZzZGfmm2bt1qXC6XqaysNO+//75ZuHChSUpK8qnciHYrVqww1dXV5vDhw+Z///d/TX5+vklJSTEnTpwwxhizaNEiM2zYMPP666+b//u//zN5eXkmLy/P5qgDd+rUKe+5K8ls2LDBvPPOO+bo0aPGGGPKy8tNUlKSefnll83+/fvNt7/9bTNixAjz2Wefefcxc+ZMM3HiRPPmm2+aN954w3z1q181t912m11d6pK/Pp86dcr86Ec/MjU1Nebw4cNmx44dZtKkSearX/2q+fzzz737iLY+G2PM4sWLTWJioqmurjaNjY3e5a9//au3TVfHdXfe17tCwhPF5s2bZyRdtOzatcsY87dyxQkTJpiBAweayy67zIwfP95s2rTJnD9/3mc/u3btMhMmTDBxcXEmJyfHVFRUhL8zAeiq38YYc+TIETNr1iwzYMAAk5KSYlasWGHOnj3rs59o6/cX7du3z+Tm5prExETTv39/8/Wvf938+7//u88bozHGvPvuu+a6664zLpfLfOUrXzHl5eU2RRw8jz/+uBk2bJiJi4sz11xzjdm7d6/dIQXVrbfeajIyMkxcXJz5yle+Ym699VZTX1/v3f7ZZ5+ZH/7wh2bw4MEmPj7efOc73zGNjY02Rtwzu3bt6vQ8njdvnjHmb6Xpq1evNunp6cblcpnp06eburo6n32cPHnS3HbbbWbgwIEmISHB3H333d7/+EQif33+61//ambMmGFSU1NNv379THZ2tlmwYMFFyXy09dkY02mfJfm853bnuO7O+7o/MX8PBgAAwLH4HR4AAOB4JDwAAMDxSHgAAIDjkfAAAADHI+EBAACOR8IDAAAcj4QHAAA4HgkPAABwPBIeAADgeCQ8AADA8Uh4AACA4/0/dSgnTBMq6HMAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_spatial_extent(parquet_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XqoUU0rSQdzn" + }, + "source": [ + "## Get Temporal Extent of the dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j8hIUrEiQdzo" + }, + "source": [ + "Similary to the spatial extent, we're retrieving the minimum and maximum timestamp partition values of the dataset. This is not necessarely accurately representative of the TIME values, as the timestamp partition can be yearly/monthly... but is here to give an idea" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dLLrZLPRQdzo", + "outputId": "4eaebf32-a48c-4ef3-f221-d3ed71f2e269" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "v6QvVVLsQdzq" - }, - "source": [ - "## Create a TIME and BoundingBox filter" + "data": { + "text/plain": [ + "(datetime.datetime(1997, 1, 1, 11, 0), datetime.datetime(2026, 1, 1, 11, 0))" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_temporal_extent(parquet_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MkK9kuQdQdzp" + }, + "source": [ + "## Read Metadata\n", + "\n", + "For all parquet dataset, we create a sidecar file in the root of the dataset named **_common_matadata**. This contains the variable attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "MYOh_AiLQdzp", + "outputId": "d9578e1a-7a97-4c68-82b3-92a56e6cd765" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "SQ8m5afVQdzq" - }, - "outputs": [], - "source": [ - "filter_time = create_time_filter(parquet_ds, date_start='2018-12-01', date_end='2023-01-01')\n", - "filter_geo = create_bbox_filter(parquet_ds, lat_min=-34, lat_max=-28, lon_min=151, lon_max=160)\n", - "\n", - "\n", - "filter = filter_geo & filter_time" + "data": { + "text/plain": [ + "{'DATA_TYPE': {'type': 'string',\n", + " 'long_name': 'Data type',\n", + " 'conventions': 'Argo reference table 1'},\n", + " 'FORMAT_VERSION': {'type': 'string', 'long_name': 'File format version'},\n", + " 'HANDBOOK_VERSION': {'type': 'string', 'long_name': 'Data handbook version'},\n", + " 'REFERENCE_DATE_TIME': {'type': 'string',\n", + " 'long_name': 'Date of reference for Julian days',\n", + " 'conventions': 'YYYYMMDDHHMISS'},\n", + " 'DATE_CREATION': {'type': 'string',\n", + " 'long_name': 'Date of file creation',\n", + " 'conventions': 'YYYYMMDDHHMISS'},\n", + " 'DATE_UPDATE': {'type': 'string',\n", + " 'long_name': 'Date of update of this file',\n", + " 'conventions': 'YYYYMMDDHHMISS'},\n", + " 'PLATFORM_NUMBER': {'type': 'string',\n", + " 'long_name': 'Float unique identifier',\n", + " 'conventions': 'WMO float identifier : A9IIIII'},\n", + " 'PROJECT_NAME': {'type': 'string', 'long_name': 'Name of the project'},\n", + " 'PI_NAME': {'type': 'string',\n", + " 'long_name': 'Name of the principal investigator'},\n", + " 'STATION_PARAMETERS': {'type': 'string',\n", + " 'long_name': 'List of available parameters for the station',\n", + " 'conventions': 'Argo reference table 3'},\n", + " 'CYCLE_NUMBER': {'type': 'double',\n", + " 'long_name': 'Float cycle number',\n", + " 'conventions': '0...N, 0 : launch cycle (if exists), 1 : first complete cycle'},\n", + " 'DIRECTION': {'type': 'string',\n", + " 'long_name': 'Direction of the station profiles',\n", + " 'conventions': 'A: ascending profiles, D: descending profiles'},\n", + " 'DATA_CENTRE': {'type': 'string',\n", + " 'long_name': 'Data centre in charge of float data processing',\n", + " 'conventions': 'Argo reference table 4'},\n", + " 'DC_REFERENCE': {'type': 'string',\n", + " 'long_name': 'Station unique identifier in data centre',\n", + " 'conventions': 'Data centre convention'},\n", + " 'DATA_STATE_INDICATOR': {'type': 'string',\n", + " 'long_name': 'Degree of processing the data have passed through',\n", + " 'conventions': 'Argo reference table 6'},\n", + " 'DATA_MODE': {'type': 'string',\n", + " 'long_name': 'Delayed mode or real time data',\n", + " 'conventions': 'R : real time; D : delayed mode; A : real time with adjustment'},\n", + " 'PLATFORM_TYPE': {'type': 'string',\n", + " 'long_name': 'Type of float',\n", + " 'conventions': 'Argo reference table 23'},\n", + " 'FLOAT_SERIAL_NO': {'type': 'string',\n", + " 'long_name': 'Serial number of the float'},\n", + " 'FIRMWARE_VERSION': {'type': 'string',\n", + " 'long_name': 'Instrument firmware version'},\n", + " 'WMO_INST_TYPE': {'type': 'string',\n", + " 'long_name': 'Coded instrument type',\n", + " 'conventions': 'Argo reference table 8'},\n", + " 'JULD': {'type': 'timestamp[ns]',\n", + " 'long_name': 'Julian day (UTC) of the station relative to REFERENCE_DATE_TIME',\n", + " 'standard_name': 'time',\n", + " 'conventions': 'Relative julian days with decimal part (as parts of day)',\n", + " 'resolution': 0.0,\n", + " 'axis': 'T'},\n", + " 'JULD_QC': {'type': 'string',\n", + " 'long_name': 'Quality on date and time',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'JULD_LOCATION': {'type': 'timestamp[ns]',\n", + " 'long_name': 'Julian day (UTC) of the location relative to REFERENCE_DATE_TIME',\n", + " 'conventions': 'Relative julian days with decimal part (as parts of day)',\n", + " 'resolution': 0.0},\n", + " 'LATITUDE': {'type': 'double',\n", + " 'long_name': 'Latitude of the station, best estimate',\n", + " 'standard_name': 'latitude',\n", + " 'units': 'degree_north',\n", + " 'valid_min': -90.0,\n", + " 'valid_max': 90.0,\n", + " 'axis': 'Y'},\n", + " 'LONGITUDE': {'type': 'double',\n", + " 'long_name': 'Longitude of the station, best estimate',\n", + " 'standard_name': 'longitude',\n", + " 'units': 'degree_east',\n", + " 'valid_min': -180.0,\n", + " 'valid_max': 180.0,\n", + " 'axis': 'X'},\n", + " 'POSITION_QC': {'type': 'string',\n", + " 'long_name': 'Quality on position (latitude and longitude)',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'POSITIONING_SYSTEM': {'type': 'string', 'long_name': 'Positioning system'},\n", + " 'PROFILE_PRES_QC': {'type': 'string',\n", + " 'long_name': 'Global quality flag of PRES profile',\n", + " 'conventions': 'Argo reference table 2a'},\n", + " 'PROFILE_TEMP_QC': {'type': 'string',\n", + " 'long_name': 'Global quality flag of TEMP profile',\n", + " 'conventions': 'Argo reference table 2a'},\n", + " 'PROFILE_PSAL_QC': {'type': 'string',\n", + " 'long_name': 'Global quality flag of PSAL profile',\n", + " 'conventions': 'Argo reference table 2a'},\n", + " 'VERTICAL_SAMPLING_SCHEME': {'type': 'string',\n", + " 'long_name': 'Vertical sampling scheme',\n", + " 'conventions': 'Argo reference table 16'},\n", + " 'CONFIG_MISSION_NUMBER': {'type': 'double',\n", + " 'long_name': 'Unique number denoting the missions performed by the float',\n", + " 'conventions': '1...N, 1 : first complete mission'},\n", + " 'PRES': {'type': 'float',\n", + " 'long_name': 'Sea water pressure, equals 0 at sea-level',\n", + " 'standard_name': 'sea_water_pressure',\n", + " 'units': 'decibar',\n", + " 'valid_min': 0.0,\n", + " 'valid_max': 12000.0,\n", + " 'C_format': '%7.1f',\n", + " 'FORTRAN_format': 'F7.1',\n", + " 'resolution': 1.0,\n", + " 'axis': 'Z'},\n", + " 'PRES_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'PRES_ADJUSTED': {'type': 'float',\n", + " 'long_name': 'Sea water pressure, equals 0 at sea-level',\n", + " 'standard_name': 'sea_water_pressure',\n", + " 'units': 'decibar',\n", + " 'valid_min': 0.0,\n", + " 'valid_max': 12000.0,\n", + " 'C_format': '%7.1f',\n", + " 'FORTRAN_format': 'F7.1',\n", + " 'resolution': 1.0,\n", + " 'axis': 'Z'},\n", + " 'PRES_ADJUSTED_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'PRES_ADJUSTED_ERROR': {'type': 'float',\n", + " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", + " 'units': 'decibar',\n", + " 'C_format': '%7.1f',\n", + " 'FORTRAN_format': 'F7.1',\n", + " 'resolution': 1.0},\n", + " 'TEMP': {'type': 'float',\n", + " 'long_name': 'Sea temperature in-situ ITS-90 scale',\n", + " 'standard_name': 'sea_water_temperature',\n", + " 'units': 'degree_Celsius',\n", + " 'valid_min': -2.5,\n", + " 'valid_max': 40.0,\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'TEMP_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'TEMP_ADJUSTED': {'type': 'float',\n", + " 'long_name': 'Sea temperature in-situ ITS-90 scale',\n", + " 'standard_name': 'sea_water_temperature',\n", + " 'units': 'degree_Celsius',\n", + " 'valid_min': -2.5,\n", + " 'valid_max': 40.0,\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'TEMP_ADJUSTED_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'TEMP_ADJUSTED_ERROR': {'type': 'float',\n", + " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", + " 'units': 'degree_Celsius',\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'PSAL': {'type': 'float',\n", + " 'long_name': 'Practical salinity',\n", + " 'standard_name': 'sea_water_salinity',\n", + " 'units': 'psu',\n", + " 'valid_min': 2.0,\n", + " 'valid_max': 41.0,\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'PSAL_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'PSAL_ADJUSTED': {'type': 'float',\n", + " 'long_name': 'Practical salinity',\n", + " 'standard_name': 'sea_water_salinity',\n", + " 'units': 'psu',\n", + " 'valid_min': 2.0,\n", + " 'valid_max': 41.0,\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'PSAL_ADJUSTED_QC': {'type': 'string',\n", + " 'long_name': 'quality flag',\n", + " 'conventions': 'Argo reference table 2'},\n", + " 'PSAL_ADJUSTED_ERROR': {'type': 'float',\n", + " 'long_name': 'Contains the error on the adjusted values as determined by the delayed mode QC process',\n", + " 'units': 'psu',\n", + " 'C_format': '%9.3f',\n", + " 'FORTRAN_format': 'F9.3',\n", + " 'resolution': 0.0010000000474974513},\n", + " 'PARAMETER': {'type': 'string',\n", + " 'long_name': 'List of parameters with calibration information',\n", + " 'conventions': 'Argo reference table 3'},\n", + " 'SCIENTIFIC_CALIB_EQUATION': {'type': 'string',\n", + " 'long_name': 'Calibration equation for this parameter'},\n", + " 'SCIENTIFIC_CALIB_COEFFICIENT': {'type': 'string',\n", + " 'long_name': 'Calibration coefficients for this equation'},\n", + " 'SCIENTIFIC_CALIB_COMMENT': {'type': 'string',\n", + " 'long_name': 'Comment applying to this parameter calibration'},\n", + " 'SCIENTIFIC_CALIB_DATE': {'type': 'string',\n", + " 'long_name': 'Date of calibration',\n", + " 'conventions': 'YYYYMMDDHHMISS'},\n", + " 'HISTORY_INSTITUTION': {'type': 'string',\n", + " 'long_name': 'Institution which performed action',\n", + " 'conventions': 'Argo reference table 4'},\n", + " 'HISTORY_STEP': {'type': 'string',\n", + " 'long_name': 'Step in data processing',\n", + " 'conventions': 'Argo reference table 12'},\n", + " 'HISTORY_SOFTWARE': {'type': 'string',\n", + " 'long_name': 'Name of software which performed action',\n", + " 'conventions': 'Institution dependent'},\n", + " 'HISTORY_SOFTWARE_RELEASE': {'type': 'string',\n", + " 'long_name': 'Version/release of software which performed action',\n", + " 'conventions': 'Institution dependent'},\n", + " 'HISTORY_REFERENCE': {'type': 'string',\n", + " 'long_name': 'Reference of database',\n", + " 'conventions': 'Institution dependent'},\n", + " 'HISTORY_DATE': {'type': 'string',\n", + " 'long_name': 'Date the history record was created',\n", + " 'conventions': 'YYYYMMDDHHMISS'},\n", + " 'HISTORY_ACTION': {'type': 'string',\n", + " 'long_name': 'Action performed on data',\n", + " 'conventions': 'Argo reference table 7'},\n", + " 'HISTORY_PARAMETER': {'type': 'string',\n", + " 'long_name': 'Station parameter action is performed on',\n", + " 'conventions': 'Argo reference table 3'},\n", + " 'HISTORY_START_PRES': {'type': 'float',\n", + " 'long_name': 'Start pressure action applied on',\n", + " 'units': 'decibar'},\n", + " 'HISTORY_STOP_PRES': {'type': 'float',\n", + " 'long_name': 'Stop pressure action applied on',\n", + " 'units': 'decibar'},\n", + " 'HISTORY_PREVIOUS_VALUE': {'type': 'float',\n", + " 'long_name': 'Parameter/Flag previous value before action'},\n", + " 'HISTORY_QCTEST': {'type': 'string',\n", + " 'long_name': 'Documentation of tests performed, tests failed (in hex form)',\n", + " 'conventions': 'Write tests performed when ACTION=QCP$; tests failed when ACTION=QCF$'},\n", + " 'filename': {'type': 'string'},\n", + " 'timestamp': {'type': 'int64'},\n", + " 'polygon': {'type': 'string'},\n", + " 'dataset_metadata': {'metadata_uuid': '4402cb50-e20a-44ee-93e6-4728259250d2',\n", + " 'title': 'Argo Core'}}" ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# parquet_meta = pa.parquet.read_schema(os.path.join(dname + '_common_metadata')) # parquet metadata\n", + "get_schema_metadata(dname) # schema metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EzmbSF4oQdzq" + }, + "source": [ + "# Data Query and Plot" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v6QvVVLsQdzq" + }, + "source": [ + "## Create a TIME and BoundingBox filter" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "SQ8m5afVQdzq" + }, + "outputs": [], + "source": [ + "filter_time = create_time_filter(parquet_ds, date_start='2018-12-01', date_end='2023-01-01')\n", + "filter_geo = create_bbox_filter(parquet_ds, lat_min=-34, lat_max=-28, lon_min=151, lon_max=160)\n", + "\n", + "\n", + "filter = filter_geo & filter_time" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "LYGO9pRwQdzq", + "outputId": "c889f934-ded6-4c19-d3a3-49580d933bba" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "LYGO9pRwQdzq", - "outputId": "c889f934-ded6-4c19-d3a3-49580d933bba", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "RangeIndex: 858669 entries, 0 to 858668\n", - "Data columns (total 67 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 PROJECT_NAME 858669 non-null object \n", - " 1 PI_NAME 858669 non-null object \n", - " 2 CYCLE_NUMBER 858669 non-null float64 \n", - " 3 DIRECTION 858669 non-null object \n", - " 4 DATA_CENTRE 858669 non-null object \n", - " 5 DC_REFERENCE 858669 non-null object \n", - " 6 DATA_STATE_INDICATOR 858669 non-null object \n", - " 7 DATA_MODE 858669 non-null object \n", - " 8 PLATFORM_TYPE 858669 non-null object \n", - " 9 FLOAT_SERIAL_NO 858669 non-null object \n", - " 10 FIRMWARE_VERSION 858669 non-null object \n", - " 11 WMO_INST_TYPE 858669 non-null object \n", - " 12 JULD 858669 non-null datetime64[ns]\n", - " 13 JULD_QC 858669 non-null object \n", - " 14 JULD_LOCATION 858669 non-null datetime64[ns]\n", - " 15 LATITUDE 858669 non-null float64 \n", - " 16 LONGITUDE 858669 non-null float64 \n", - " 17 POSITION_QC 858669 non-null object \n", - " 18 POSITIONING_SYSTEM 858669 non-null object \n", - " 19 PROFILE_PRES_QC 858669 non-null object \n", - " 20 PROFILE_TEMP_QC 858669 non-null object \n", - " 21 PROFILE_PSAL_QC 858669 non-null object \n", - " 22 VERTICAL_SAMPLING_SCHEME 858669 non-null object \n", - " 23 CONFIG_MISSION_NUMBER 858669 non-null float64 \n", - " 24 PRES 783293 non-null float32 \n", - " 25 PRES_QC 783293 non-null object \n", - " 26 PRES_ADJUSTED 782327 non-null float32 \n", - " 27 PRES_ADJUSTED_QC 783293 non-null object \n", - " 28 PRES_ADJUSTED_ERROR 685287 non-null float32 \n", - " 29 TEMP 783293 non-null float32 \n", - " 30 TEMP_QC 783293 non-null object \n", - " 31 TEMP_ADJUSTED 782263 non-null float32 \n", - " 32 TEMP_ADJUSTED_QC 783293 non-null object \n", - " 33 TEMP_ADJUSTED_ERROR 685223 non-null float32 \n", - " 34 PSAL 783293 non-null float32 \n", - " 35 PSAL_QC 783293 non-null object \n", - " 36 PSAL_ADJUSTED 724184 non-null float32 \n", - " 37 PSAL_ADJUSTED_QC 783293 non-null object \n", - " 38 PSAL_ADJUSTED_ERROR 627144 non-null float32 \n", - " 39 filename 858669 non-null object \n", - " 40 DATA_TYPE 0 non-null object \n", - " 41 FORMAT_VERSION 0 non-null object \n", - " 42 HANDBOOK_VERSION 0 non-null object \n", - " 43 REFERENCE_DATE_TIME 0 non-null object \n", - " 44 DATE_CREATION 0 non-null object \n", - " 45 DATE_UPDATE 0 non-null object \n", - " 46 STATION_PARAMETERS 0 non-null object \n", - " 47 PARAMETER 0 non-null object \n", - " 48 SCIENTIFIC_CALIB_EQUATION 0 non-null object \n", - " 49 SCIENTIFIC_CALIB_COEFFICIENT 0 non-null object \n", - " 50 SCIENTIFIC_CALIB_COMMENT 0 non-null object \n", - " 51 SCIENTIFIC_CALIB_DATE 0 non-null object \n", - " 52 HISTORY_INSTITUTION 0 non-null object \n", - " 53 HISTORY_STEP 0 non-null object \n", - " 54 HISTORY_SOFTWARE 0 non-null object \n", - " 55 HISTORY_SOFTWARE_RELEASE 0 non-null object \n", - " 56 HISTORY_REFERENCE 0 non-null object \n", - " 57 HISTORY_DATE 0 non-null object \n", - " 58 HISTORY_ACTION 0 non-null object \n", - " 59 HISTORY_PARAMETER 0 non-null object \n", - " 60 HISTORY_START_PRES 0 non-null float32 \n", - " 61 HISTORY_STOP_PRES 0 non-null float32 \n", - " 62 HISTORY_PREVIOUS_VALUE 0 non-null float32 \n", - " 63 HISTORY_QCTEST 0 non-null object \n", - " 64 timestamp 858669 non-null category \n", - " 65 PLATFORM_NUMBER 858669 non-null category \n", - " 66 polygon 858669 non-null category \n", - "dtypes: category(3), datetime64[ns](2), float32(12), float64(4), object(46)\n", - "memory usage: 384.2+ MB\n", - "CPU times: user 43 s, sys: 2.4 s, total: 45.4 s\n", - "Wall time: 2min 4s\n" - ] - } - ], - "source": [ - "%%time\n", - "# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition\n", - "df = pd.read_parquet(dname, engine='pyarrow',filters=filter)\n", - "df.info()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1039011 entries, 0 to 1039010\n", + "Data columns (total 67 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 PROJECT_NAME 1039011 non-null object \n", + " 1 PI_NAME 1039011 non-null object \n", + " 2 CYCLE_NUMBER 1039011 non-null float64 \n", + " 3 DIRECTION 1039011 non-null object \n", + " 4 DATA_CENTRE 1039011 non-null object \n", + " 5 DC_REFERENCE 1039011 non-null object \n", + " 6 DATA_STATE_INDICATOR 1039011 non-null object \n", + " 7 DATA_MODE 1039011 non-null object \n", + " 8 PLATFORM_TYPE 1039011 non-null object \n", + " 9 FLOAT_SERIAL_NO 1039011 non-null object \n", + " 10 FIRMWARE_VERSION 1039011 non-null object \n", + " 11 WMO_INST_TYPE 1039011 non-null object \n", + " 12 JULD 1039011 non-null datetime64[ns]\n", + " 13 JULD_QC 1039011 non-null object \n", + " 14 JULD_LOCATION 1039011 non-null datetime64[ns]\n", + " 15 LATITUDE 1039011 non-null float64 \n", + " 16 LONGITUDE 1039011 non-null float64 \n", + " 17 POSITION_QC 1039011 non-null object \n", + " 18 POSITIONING_SYSTEM 1039011 non-null object \n", + " 19 PROFILE_PRES_QC 1039011 non-null object \n", + " 20 PROFILE_TEMP_QC 1039011 non-null object \n", + " 21 VERTICAL_SAMPLING_SCHEME 1039011 non-null object \n", + " 22 CONFIG_MISSION_NUMBER 1039011 non-null float64 \n", + " 23 PRES 951351 non-null float32 \n", + " 24 PRES_QC 951351 non-null object \n", + " 25 PRES_ADJUSTED 950345 non-null float32 \n", + " 26 PRES_ADJUSTED_QC 951351 non-null object \n", + " 27 PRES_ADJUSTED_ERROR 934443 non-null float32 \n", + " 28 TEMP 951351 non-null float32 \n", + " 29 TEMP_QC 951351 non-null object \n", + " 30 TEMP_ADJUSTED 949907 non-null float32 \n", + " 31 TEMP_ADJUSTED_QC 951351 non-null object \n", + " 32 TEMP_ADJUSTED_ERROR 934005 non-null float32 \n", + " 33 filename 1039011 non-null object \n", + " 34 DATA_TYPE 0 non-null object \n", + " 35 FORMAT_VERSION 0 non-null object \n", + " 36 HANDBOOK_VERSION 0 non-null object \n", + " 37 REFERENCE_DATE_TIME 0 non-null object \n", + " 38 DATE_CREATION 0 non-null object \n", + " 39 DATE_UPDATE 0 non-null object \n", + " 40 STATION_PARAMETERS 0 non-null object \n", + " 41 PROFILE_PSAL_QC 1039011 non-null object \n", + " 42 PSAL 951351 non-null float32 \n", + " 43 PSAL_QC 951351 non-null object \n", + " 44 PSAL_ADJUSTED 886170 non-null float32 \n", + " 45 PSAL_ADJUSTED_QC 951351 non-null object \n", + " 46 PSAL_ADJUSTED_ERROR 870268 non-null float32 \n", + " 47 PARAMETER 0 non-null object \n", + " 48 SCIENTIFIC_CALIB_EQUATION 0 non-null object \n", + " 49 SCIENTIFIC_CALIB_COEFFICIENT 0 non-null object \n", + " 50 SCIENTIFIC_CALIB_COMMENT 0 non-null object \n", + " 51 SCIENTIFIC_CALIB_DATE 0 non-null object \n", + " 52 HISTORY_INSTITUTION 0 non-null object \n", + " 53 HISTORY_STEP 0 non-null object \n", + " 54 HISTORY_SOFTWARE 0 non-null object \n", + " 55 HISTORY_SOFTWARE_RELEASE 0 non-null object \n", + " 56 HISTORY_REFERENCE 0 non-null object \n", + " 57 HISTORY_DATE 0 non-null object \n", + " 58 HISTORY_ACTION 0 non-null object \n", + " 59 HISTORY_PARAMETER 0 non-null object \n", + " 60 HISTORY_START_PRES 0 non-null float32 \n", + " 61 HISTORY_STOP_PRES 0 non-null float32 \n", + " 62 HISTORY_PREVIOUS_VALUE 0 non-null float32 \n", + " 63 HISTORY_QCTEST 0 non-null object \n", + " 64 timestamp 1039011 non-null category \n", + " 65 PLATFORM_NUMBER 1039011 non-null category \n", + " 66 polygon 1039011 non-null category \n", + "dtypes: category(3), datetime64[ns](2), float32(12), float64(4), object(46)\n", + "memory usage: 465.2+ MB\n", + "CPU times: user 1min 51s, sys: 2.67 s, total: 1min 53s\n", + "Wall time: 2min 46s\n" + ] + } + ], + "source": [ + "%%time\n", + "# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition\n", + "df = pd.read_parquet(dname, engine='pyarrow',filters=filter)\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 }, + "id": "0VG-PwmIQdzs", + "outputId": "fc029ab9-94e4-4c34-ba88-2e3241c5bf63" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "0VG-PwmIQdzs", - "outputId": "fc029ab9-94e4-4c34-ba88-2e3241c5bf63", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489 - } - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 14 - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": {} - } - ], - "source": [ - "df.plot.scatter(x='TEMP_ADJUSTED', y='PSAL_ADJUSTED', c='PRES_ADJUSTED', marker='+', linestyle=\"None\", cmap='RdYlBu_r', title='Temperature for each location')" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "id": "geqOPVHIQdzt", - "outputId": "b9fc1aea-cb14-417b-b692-5a76563163e7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 472 - } - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": {} - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "filtered_df = df[df['PLATFORM_NUMBER'] == 5905506]\n", - "\n", - "# Get unique values of CYCLE_NUMBER\n", - "unique_cycle_numbers = filtered_df['CYCLE_NUMBER'].unique()\n", - "\n", - "# Define a dictionary to map each unique CYCLE_NUMBER to a color\n", - "color_mapping = {cycle_number: plt.cm.viridis_r(i / len(unique_cycle_numbers)) for i, cycle_number in enumerate(unique_cycle_numbers)}\n", - "\n", - "# Plot TEMP_ADJUSTED vs PRES_ADJUSTED with different colors for each line\n", - "for cycle_number, color in color_mapping.items():\n", - " cycle_df = filtered_df[filtered_df['CYCLE_NUMBER'] == cycle_number]\n", - " plt.plot(cycle_df['TEMP_ADJUSTED'], cycle_df['PRES_ADJUSTED'], color=color, label=f'Cycle {cycle_number}')\n", - "\n", - "plt.xlabel('Temperature Adjusted')\n", - "plt.ylabel('Pressure Adjusted')\n", - "plt.title('Temperature vs Pressure')\n", - "plt.legend()\n", - "plt.grid(True)\n", - "\n", - "# Reverse the y-axis\n", - "plt.gca().invert_yaxis()\n", - "\n", - "plt.show()" + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.plot.scatter(x='TEMP_ADJUSTED', y='PSAL_ADJUSTED', c='PRES_ADJUSTED', marker='+', linestyle=\"None\", cmap='RdYlBu_r', title='Temperature for each location')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 472 }, + "id": "geqOPVHIQdzt", + "outputId": "b9fc1aea-cb14-417b-b692-5a76563163e7" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "psp_kOBFQdzt" - }, - "source": [ - "## Create a TIME and scalar/number filter" + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "filtered_df = df[df['PLATFORM_NUMBER'] == 5905506]\n", + "\n", + "# Get unique values of CYCLE_NUMBER\n", + "unique_cycle_numbers = filtered_df['CYCLE_NUMBER'].unique()\n", + "\n", + "# Define a dictionary to map each unique CYCLE_NUMBER to a color\n", + "color_mapping = {cycle_number: plt.cm.viridis_r(i / len(unique_cycle_numbers)) for i, cycle_number in enumerate(unique_cycle_numbers)}\n", + "\n", + "# Plot TEMP_ADJUSTED vs PRES_ADJUSTED with different colors for each line\n", + "for cycle_number, color in color_mapping.items():\n", + " cycle_df = filtered_df[filtered_df['CYCLE_NUMBER'] == cycle_number]\n", + " plt.plot(cycle_df['TEMP_ADJUSTED'], cycle_df['PRES_ADJUSTED'], color=color, label=f'Cycle {cycle_number}')\n", + "\n", + "plt.xlabel('Temperature Adjusted')\n", + "plt.ylabel('Pressure Adjusted')\n", + "plt.title('Temperature vs Pressure')\n", + "plt.legend()\n", + "plt.grid(True)\n", + "\n", + "# Reverse the y-axis\n", + "plt.gca().invert_yaxis()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "psp_kOBFQdzt" + }, + "source": [ + "## Create a TIME and scalar/number filter" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "PtlD75nkQdzu" + }, + "outputs": [], + "source": [ + "filter_time = create_time_filter(parquet_ds, date_start='2006-07-12', date_end='2023-02-05')\n", + "\n", + "expr_1 = pc.field('PLATFORM_NUMBER') == 1901740\n", + "filter = expr_1 & filter_time" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "PQF8IgX9Qdzu", + "outputId": "8930b940-58f5-449d-eb21-3d723bf6c0c8" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "PtlD75nkQdzu" - }, - "outputs": [], - "source": [ - "filter_time = create_time_filter(parquet_ds, date_start='2006-07-12', date_end='2023-02-05')\n", - "\n", - "expr_1 = pc.field('PLATFORM_NUMBER') == 1901740\n", - "filter = expr_1 & filter_time" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 150192 entries, 0 to 150191\n", + "Data columns (total 67 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 PROJECT_NAME 150192 non-null object \n", + " 1 PI_NAME 150192 non-null object \n", + " 2 CYCLE_NUMBER 150192 non-null float64 \n", + " 3 DIRECTION 150192 non-null object \n", + " 4 DATA_CENTRE 150192 non-null object \n", + " 5 DC_REFERENCE 150192 non-null object \n", + " 6 DATA_STATE_INDICATOR 150192 non-null object \n", + " 7 DATA_MODE 150192 non-null object \n", + " 8 PLATFORM_TYPE 150192 non-null object \n", + " 9 FLOAT_SERIAL_NO 150192 non-null object \n", + " 10 FIRMWARE_VERSION 150192 non-null object \n", + " 11 WMO_INST_TYPE 150192 non-null object \n", + " 12 JULD 150192 non-null datetime64[ns]\n", + " 13 JULD_QC 150192 non-null object \n", + " 14 JULD_LOCATION 150192 non-null datetime64[ns]\n", + " 15 LATITUDE 150192 non-null float64 \n", + " 16 LONGITUDE 150192 non-null float64 \n", + " 17 POSITION_QC 150192 non-null object \n", + " 18 POSITIONING_SYSTEM 150192 non-null object \n", + " 19 PROFILE_PRES_QC 150192 non-null object \n", + " 20 PROFILE_TEMP_QC 150192 non-null object \n", + " 21 VERTICAL_SAMPLING_SCHEME 150192 non-null object \n", + " 22 CONFIG_MISSION_NUMBER 150192 non-null float64 \n", + " 23 PRES 148328 non-null float32 \n", + " 24 PRES_QC 148328 non-null object \n", + " 25 PRES_ADJUSTED 148328 non-null float32 \n", + " 26 PRES_ADJUSTED_QC 148328 non-null object \n", + " 27 PRES_ADJUSTED_ERROR 148328 non-null float32 \n", + " 28 TEMP 148328 non-null float32 \n", + " 29 TEMP_QC 148328 non-null object \n", + " 30 TEMP_ADJUSTED 148320 non-null float32 \n", + " 31 TEMP_ADJUSTED_QC 148328 non-null object \n", + " 32 TEMP_ADJUSTED_ERROR 148320 non-null float32 \n", + " 33 filename 150192 non-null object \n", + " 34 DATA_TYPE 0 non-null object \n", + " 35 FORMAT_VERSION 0 non-null object \n", + " 36 HANDBOOK_VERSION 0 non-null object \n", + " 37 REFERENCE_DATE_TIME 0 non-null object \n", + " 38 DATE_CREATION 0 non-null object \n", + " 39 DATE_UPDATE 0 non-null object \n", + " 40 STATION_PARAMETERS 0 non-null object \n", + " 41 PROFILE_PSAL_QC 150192 non-null object \n", + " 42 PSAL 148328 non-null float32 \n", + " 43 PSAL_QC 148328 non-null object \n", + " 44 PSAL_ADJUSTED 147969 non-null float32 \n", + " 45 PSAL_ADJUSTED_QC 148328 non-null object \n", + " 46 PSAL_ADJUSTED_ERROR 147969 non-null float32 \n", + " 47 PARAMETER 0 non-null object \n", + " 48 SCIENTIFIC_CALIB_EQUATION 0 non-null object \n", + " 49 SCIENTIFIC_CALIB_COEFFICIENT 0 non-null object \n", + " 50 SCIENTIFIC_CALIB_COMMENT 0 non-null object \n", + " 51 SCIENTIFIC_CALIB_DATE 0 non-null object \n", + " 52 HISTORY_INSTITUTION 0 non-null object \n", + " 53 HISTORY_STEP 0 non-null object \n", + " 54 HISTORY_SOFTWARE 0 non-null object \n", + " 55 HISTORY_SOFTWARE_RELEASE 0 non-null object \n", + " 56 HISTORY_REFERENCE 0 non-null object \n", + " 57 HISTORY_DATE 0 non-null object \n", + " 58 HISTORY_ACTION 0 non-null object \n", + " 59 HISTORY_PARAMETER 0 non-null object \n", + " 60 HISTORY_START_PRES 0 non-null float32 \n", + " 61 HISTORY_STOP_PRES 0 non-null float32 \n", + " 62 HISTORY_PREVIOUS_VALUE 0 non-null float32 \n", + " 63 HISTORY_QCTEST 0 non-null object \n", + " 64 timestamp 150192 non-null category \n", + " 65 PLATFORM_NUMBER 150192 non-null category \n", + " 66 polygon 150192 non-null category \n", + "dtypes: category(3), datetime64[ns](2), float32(12), float64(4), object(46)\n", + "memory usage: 67.7+ MB\n", + "CPU times: user 55.7 s, sys: 887 ms, total: 56.6 s\n", + "Wall time: 1min 39s\n" + ] + } + ], + "source": [ + "%%time\n", + "# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition\n", + "df = pd.read_parquet(dname, engine='pyarrow',filters=filter)\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 }, + "id": "dvO7kLp1Qdzu", + "outputId": "3624827e-c2ca-4179-e20d-cdf8b2be6529" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "PQF8IgX9Qdzu", - "outputId": "8930b940-58f5-449d-eb21-3d723bf6c0c8", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "RangeIndex: 150192 entries, 0 to 150191\n", - "Data columns (total 67 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 PROJECT_NAME 150192 non-null object \n", - " 1 PI_NAME 150192 non-null object \n", - " 2 CYCLE_NUMBER 150192 non-null float64 \n", - " 3 DIRECTION 150192 non-null object \n", - " 4 DATA_CENTRE 150192 non-null object \n", - " 5 DC_REFERENCE 150192 non-null object \n", - " 6 DATA_STATE_INDICATOR 150192 non-null object \n", - " 7 DATA_MODE 150192 non-null object \n", - " 8 PLATFORM_TYPE 150192 non-null object \n", - " 9 FLOAT_SERIAL_NO 150192 non-null object \n", - " 10 FIRMWARE_VERSION 150192 non-null object \n", - " 11 WMO_INST_TYPE 150192 non-null object \n", - " 12 JULD 150192 non-null datetime64[ns]\n", - " 13 JULD_QC 150192 non-null object \n", - " 14 JULD_LOCATION 150192 non-null datetime64[ns]\n", - " 15 LATITUDE 150192 non-null float64 \n", - " 16 LONGITUDE 150192 non-null float64 \n", - " 17 POSITION_QC 150192 non-null object \n", - " 18 POSITIONING_SYSTEM 150192 non-null object \n", - " 19 PROFILE_PRES_QC 150192 non-null object \n", - " 20 PROFILE_TEMP_QC 150192 non-null object \n", - " 21 PROFILE_PSAL_QC 150192 non-null object \n", - " 22 VERTICAL_SAMPLING_SCHEME 150192 non-null object \n", - " 23 CONFIG_MISSION_NUMBER 150192 non-null float64 \n", - " 24 PRES 148328 non-null float32 \n", - " 25 PRES_QC 148328 non-null object \n", - " 26 PRES_ADJUSTED 148328 non-null float32 \n", - " 27 PRES_ADJUSTED_QC 148328 non-null object \n", - " 28 PRES_ADJUSTED_ERROR 140351 non-null float32 \n", - " 29 TEMP 148328 non-null float32 \n", - " 30 TEMP_QC 148328 non-null object \n", - " 31 TEMP_ADJUSTED 148320 non-null float32 \n", - " 32 TEMP_ADJUSTED_QC 148328 non-null object \n", - " 33 TEMP_ADJUSTED_ERROR 140343 non-null float32 \n", - " 34 PSAL 148328 non-null float32 \n", - " 35 PSAL_QC 148328 non-null object \n", - " 36 PSAL_ADJUSTED 147971 non-null float32 \n", - " 37 PSAL_ADJUSTED_QC 148328 non-null object \n", - " 38 PSAL_ADJUSTED_ERROR 139994 non-null float32 \n", - " 39 filename 150192 non-null object \n", - " 40 DATA_TYPE 0 non-null object \n", - " 41 FORMAT_VERSION 0 non-null object \n", - " 42 HANDBOOK_VERSION 0 non-null object \n", - " 43 REFERENCE_DATE_TIME 0 non-null object \n", - " 44 DATE_CREATION 0 non-null object \n", - " 45 DATE_UPDATE 0 non-null object \n", - " 46 STATION_PARAMETERS 0 non-null object \n", - " 47 PARAMETER 0 non-null object \n", - " 48 SCIENTIFIC_CALIB_EQUATION 0 non-null object \n", - " 49 SCIENTIFIC_CALIB_COEFFICIENT 0 non-null object \n", - " 50 SCIENTIFIC_CALIB_COMMENT 0 non-null object \n", - " 51 SCIENTIFIC_CALIB_DATE 0 non-null object \n", - " 52 HISTORY_INSTITUTION 0 non-null object \n", - " 53 HISTORY_STEP 0 non-null object \n", - " 54 HISTORY_SOFTWARE 0 non-null object \n", - " 55 HISTORY_SOFTWARE_RELEASE 0 non-null object \n", - " 56 HISTORY_REFERENCE 0 non-null object \n", - " 57 HISTORY_DATE 0 non-null object \n", - " 58 HISTORY_ACTION 0 non-null object \n", - " 59 HISTORY_PARAMETER 0 non-null object \n", - " 60 HISTORY_START_PRES 0 non-null float32 \n", - " 61 HISTORY_STOP_PRES 0 non-null float32 \n", - " 62 HISTORY_PREVIOUS_VALUE 0 non-null float32 \n", - " 63 HISTORY_QCTEST 0 non-null object \n", - " 64 timestamp 150192 non-null category \n", - " 65 PLATFORM_NUMBER 150192 non-null category \n", - " 66 polygon 150192 non-null category \n", - "dtypes: category(3), datetime64[ns](2), float32(12), float64(4), object(46)\n", - "memory usage: 67.3+ MB\n", - "CPU times: user 23.8 s, sys: 647 ms, total: 24.5 s\n", - "Wall time: 1min 24s\n" - ] - } - ], - "source": [ - "%%time\n", - "# using pandas instead of pyarrow so that filters can directly be applied to the data, and not just the partition\n", - "df = pd.read_parquet(dname, engine='pyarrow',filters=filter)\n", - "df.info()" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "id": "dvO7kLp1Qdzu", - "outputId": "3624827e-c2ca-4179-e20d-cdf8b2be6529", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489 - } - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 18 - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": {} - } - ], - "source": [ - "df.plot.scatter(x='TEMP_ADJUSTED', y='PSAL_ADJUSTED', c='PRES_ADJUSTED', marker='+', linestyle=\"None\", cmap='RdYlBu_r', title='Temperature for each location')" + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tmq8RiggQdzv" - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - }, - "colab": { - "provenance": [] + }, + "metadata": {}, + "output_type": "display_data" } + ], + "source": [ + "df.plot.scatter(x='TEMP_ADJUSTED', y='PSAL_ADJUSTED', c='PRES_ADJUSTED', marker='+', linestyle=\"None\", cmap='RdYlBu_r', title='Temperature for each location')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tmq8RiggQdzv" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/notebooks/gsla_nrt.ipynb b/notebooks/gsla_nrt.ipynb index 312a67d..2fcfa43 100644 --- a/notebooks/gsla_nrt.ipynb +++ b/notebooks/gsla_nrt.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "id": "67dd6387-5e3e-4a9d-8249-5a21fe2111b5", "metadata": {}, "outputs": [], @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 2, "id": "2a6964b2-b36a-4699-8822-f92ca90c554b", "metadata": {}, "outputs": [ @@ -383,22 +383,21 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.Dataset> Size: 1GB\n",
-       "Dimensions:     (TIME: 115, LATITUDE: 351, LONGITUDE: 641)\n",
+       "
<xarray.Dataset> Size: 2GB\n",
+       "Dimensions:     (TIME: 180, LATITUDE: 351, LONGITUDE: 641)\n",
        "Coordinates:\n",
        "  * LATITUDE    (LATITUDE) float64 3kB -60.0 -59.8 -59.6 -59.4 ... 9.6 9.8 10.0\n",
        "  * LONGITUDE   (LONGITUDE) float64 5kB 57.0 57.2 57.4 ... 184.6 184.8 185.0\n",
-       "  * TIME        (TIME) datetime64[ns] 920B 2024-01-01 ... 2024-04-20T06:00:00\n",
+       "  * TIME        (TIME) datetime64[ns] 1kB 2024-01-01 ... 2024-06-24T06:00:00\n",
        "Data variables:\n",
-       "    GSL         (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    GSLA        (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    UCUR        (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    UCUR_MEAN   (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    VCUR        (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    VCUR_MEAN   (TIME, LATITUDE, LONGITUDE) float64 207MB dask.array<chunksize=(1, 350, 640), meta=np.ndarray>\n",
-       "    end_time    (TIME) datetime64[ns] 920B dask.array<chunksize=(1,), meta=np.ndarray>\n",
-       "    filename    (TIME) <U54 25kB dask.array<chunksize=(1,), meta=np.ndarray>\n",
-       "    start_time  (TIME) datetime64[ns] 920B dask.array<chunksize=(1,), meta=np.ndarray>\n",
+       "    GSL         (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    GSLA        (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    UCUR        (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    UCUR_MEAN   (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    VCUR        (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    VCUR_MEAN   (TIME, LATITUDE, LONGITUDE) float64 324MB dask.array<chunksize=(5, 351, 641), meta=np.ndarray>\n",
+       "    end_time    (TIME) datetime64[ns] 1kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
+       "    start_time  (TIME) datetime64[ns] 1kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
        "Attributes: (12/35)\n",
        "    Conventions:                   CF-1.6,IMOS-1.4\n",
        "    abstract:                      Gridded (adjusted) sea level anomaly (GSLA...\n",
@@ -410,11 +409,11 @@
        "    project:                       Integrated Marine Observing System (IMOS)\n",
        "    references:                    http://imos.aodn.org.au/oceancurrent\n",
        "    standard_name_vocabulary:      NetCDF Climate and Forecast (CF) Metadata ...\n",
-       "    time_coverage_end:             2024-04-22T20:04:38Z\n",
-       "    time_coverage_start:           2024-03-28T08:04:51Z\n",
-       "    title:                         Gridded Sea Level Anomaly - Australia Region
  • Conventions :
    CF-1.6
    Metadata_Conventions :
    Unidata Dataset Discovery v1.0
    Metadata_Link :
    TBA
    acknowledgment :
    Any use of these data requires the following acknowledgment:HRPT AVHRR SSTskin retrievals were produced by the Australian Bureau of Meteorology as a contribution to the Integrated Marine Observing System - an initiative of the Australian Government being conducted as part of the National Collaborative Research Infrastructure Strategy and the Super Science Initiative. The imagery data were acquired from NOAA spacecraft by the Bureau, Australian Institute of Marine Science, Australian Commonwealth Scientific and Industrial Research Organization, Geoscience Australia, and Western Australian Satellite Technology and Applications Consortium.
    cdm_data_type :
    grid
    comment :
    HRPT AVHRR experimental L3 retrieval produced by the Australian Bureau of Meteorology as a contribution to the Integrated Marine Observing System. SSTs were calibrated to drifting buoy depths (~20-30cm) followed by a cool skin correction of -0.17K to convert to a skin (~10 micron) SST. SSTs are a weighted average of the SSTs of contributing pixels (weighted by sses_standard_deviation^-2).\\nWARNING: some applications are unable to properly handle signed byte values. If byte values >127 are encountered, subtract 256 from this reported value. GRID:CONTINENTAL, SYSCODE:PRODUCTION
    creator_email :
    ghrsst@bom.gov.au
    creator_name :
    Australian Bureau of Meteorology
    creator_url :
    http://www.imos.org.au/srs.html
    date_created :
    20240213T034943Z
    easternmost_longitude :
    -170.00999450683594
    file_quality_level :
    3
    gds_version_id :
    2.0r4
    geospatial_lat_resolution :
    0.019999999552965164
    geospatial_lat_units :
    degrees_north
    geospatial_lon_resolution :
    0.019999999552965164
    geospatial_lon_units :
    degrees_east
    history :
    platform_counts=NOAA-18=2,quality_counts=archive=2,platform=NOAA-18,quality_source=archive,ice_source=SSMI-NCEP-Analysis-ICE-5min,adi_source=unknown,wind_source=ACCESSG-ABOM-Forecast-WSP,analysis_source=ABOM-L4LRfnd-GLOB-GAMSSA_28km,source_file=20240210152000-ABOM-L3C_GHRSST-SSTskin-AVHRR18_D-1d_night-v02.0-fv01.0.nc;20240210032000-ABOM-L3C_GHRSST-SSTskin-AVHRR18_D-1d_day-v02.0-fv01.0.nc,l3_file=20240210152000-ABOM-L3C_GHRSST-SSTskin-AVHRR18_D-1d_night-v02.0-fv01.0.nc;20240210032000-ABOM-L3C_GHRSST-SSTskin-AVHRR18_D-1d_day-v02.0-fv01.0.nc,l3_source=AVHRR18_D-ABOM-L3C-v01.0,global_source=wind_source=ACCESSG-ABOM-Forecast-WSP,analysis_source=ABOM-L4LRfnd-GLOB-GAMSSA_28km,adi_source=unknown,ice_source=SSMI-NCEP-Analysis-ICE-5min,l3_source=AVHRR18_D-ABOM-L3C-v01.0,landmask_file=lsmask.dist5.5.nc,landmask_reference=Naval Oceanographic Office (NAVOCEANO),landmask_URL=https://www.ghrsst.org/data/ghrsst-data-tools/navo-ghrsst-pp-land-sea-mask/,landmask_source=NAVOCEANO 1km Version 5.5,ice_reference=US National Weather Service - NCEP,ice_URL=http://polar.ncep.noaa.gov/seaice/Analyses.html,ice_file=20240209.ice_data.5min.nc,ice_jdate=2460350,merge_tool=mergeL3U,mergeL3U_version=9042,quality=archive,mergeL3U_quality=archive
    id :
    AVHRR_D-ABOM-L3S-v01.0
    institution :
    ABOM
    keywords :
    Oceans > Ocean Temperature > Sea Surface Temperature
    keywords_vocabulary :
    NASA Global Change Master Directory (GCMD) Science Keywords
    license :
    GHRSST protocol describes data use as free and open
    naming_authority :
    org.ghrsst
    netcdf_version_id :
    4.3.3.1
    northernmost_latitude :
    19.989999771118164
    platform :
    NOAA-18
    processing_level :
    L3S
    product_version :
    01.0
    project :
    Group for High Resolution Sea Surface Temperature
    publisher_email :
    gpa@ghrsst.org
    publisher_name :
    The GHRSST Project Office
    publisher_url :
    http://www.ghrsst.org
    references :
    http://imos.org.au/sstproducts.html and Griffin et al. (2017) at http://imos.org.au/facilities/srs/sstproducts/sstdata0/sstdata-references
    sensor :
    VIIRS,AVHRR
    source :
    wind_source=ACCESSG-ABOM-Forecast-WSP,analysis_source=ABOM-L4LRfnd-GLOB-GAMSSA_28km,adi_source=unknown,ice_source=SSMI-NCEP-Analysis-ICE-5min,l3_source=AVHRR18_D-ABOM-L3C-v01.0
    southernmost_latitude :
    -69.98999786376953
    spatial_resolution :
    0.02 deg
    standard_name_vocabulary :
    NetCDF Climate and Forecast (CF) Metadata Convention
    start_time :
    20240210T002953Z
    stop_time :
    20240210T163514Z
    summary :
    Skin SST retrievals produced from stitching together High Resolution Picture Transmission direct broadcast data from a NOAA polar-orbiting satellite received at Australian receiving stations.
    time_coverage_end :
    20240210T163514Z
    time_coverage_start :
    20240210T002953Z
    title :
    IMOS L3S Day and Night gridded multiple-sensor multiple-swath Australian region foundation SST
    uuid :
    b21e26ac-1562-42b6-840b-43af2aa7c11b
    westernmost_longitude :
    70.01000213623047
  • " ], "text/plain": [ - " Size: 202GB\n", - "Dimensions: (time: 104, lat: 4500, lon: 6000)\n", + " Size: 119GB\n", + "Dimensions: (time: 50, lat: 4500, lon: 6000)\n", "Coordinates:\n", - " * lat (lat) float32 18kB 19.99 19.97 ... -69.99\n", - " * lon (lon) float32 24kB 70.01 70.03 ... 190.0\n", - " * time (time) datetime64[ns] 832B 2024-01-01T...\n", - "Data variables: (12/18)\n", - " dt_analysis (time, lat, lon) float32 11GB dask.array\n", - " filename (time) \n", - " l2p_flags (time, lat, lon) float32 11GB dask.array\n", - " quality_level (time, lat, lon) float32 11GB dask.array\n", - " satellite_zenith_angle (time, lat, lon) float32 11GB dask.array\n", - " sea_ice_fraction (time, lat, lon) float32 11GB dask.array\n", - " ... ...\n", - " sst_count (time, lat, lon) float32 11GB dask.array\n", - " sst_dtime (time, lat, lon) float64 22GB dask.array\n", - " sst_mean (time, lat, lon) float32 11GB dask.array\n", - " sst_standard_deviation (time, lat, lon) float32 11GB dask.array\n", - " wind_speed (time, lat, lon) float32 11GB dask.array\n", - " wind_speed_dtime_from_sst (time, lat, lon) float32 11GB dask.array\n", + " * lat (lat) float32 18kB 19.99 19.97 ... -69.97 -69.99\n", + " * lon (lon) float32 24kB 70.01 70.03 ... 190.0 190.0\n", + " * time (time) datetime64[ns] 400B 2024-01-01T09:20:00 ....\n", + "Data variables:\n", + " dt_analysis (time, lat, lon) float64 11GB dask.array\n", + " l2p_flags (time, lat, lon) float32 5GB dask.array\n", + " quality_level (time, lat, lon) float32 5GB dask.array\n", + " satellite_zenith_angle (time, lat, lon) float64 11GB dask.array\n", + " sea_surface_temperature (time, lat, lon) float64 11GB dask.array\n", + " sses_bias (time, lat, lon) float64 11GB dask.array\n", + " sses_count (time, lat, lon) float64 11GB dask.array\n", + " sses_standard_deviation (time, lat, lon) float64 11GB dask.array\n", + " sst_count (time, lat, lon) float64 11GB dask.array\n", + " sst_dtime (time, lat, lon) float64 11GB dask.array\n", + " sst_mean (time, lat, lon) float64 11GB dask.array\n", + " sst_standard_deviation (time, lat, lon) float64 11GB dask.array\n", "Attributes: (12/47)\n", " Conventions: CF-1.6\n", " Metadata_Conventions: Unidata Dataset Discovery v1.0\n", @@ -2866,14 +1902,14 @@ " comment: HRPT AVHRR experimental L3 retrieval produced...\n", " ... ...\n", " summary: Skin SST retrievals produced from stitching t...\n", - " time_coverage_end: 20240413T165456Z\n", - " time_coverage_start: 20240412T232018Z\n", + " time_coverage_end: 20240210T163514Z\n", + " time_coverage_start: 20240210T002953Z\n", " title: IMOS L3S Day and Night gridded multiple-senso...\n", - " uuid: 0486b68d-00cc-4643-bbdf-831627b18798\n", + " uuid: b21e26ac-1562-42b6-840b-43af2aa7c11b\n", " westernmost_longitude: 70.01000213623047" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -2881,32 +1917,32 @@ "source": [ "# remote zarr dataset\n", "dataset_name=\"srs_l3s_1d_dn\"\n", - "url = f's3://imos-data-lab-optimised/parquet/loz_test/{dataset_name}.zarr/'\n", + "url = f's3://imos-data-lab-optimised/cloud_optimised/cluster_testing/{dataset_name}.zarr/'\n", "ds = xr.open_zarr(fsspec.get_mapper(url, anon=True), consolidated=True)\n", "ds" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "6f1e788f-7516-444d-8a7c-6cdf18f62928", + "execution_count": 4, + "id": "9a0a9794-547f-46bd-b015-9f0dd53cd8d7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "[]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "", + "image/png": "\n", "text/plain": [ - "
    " + "
    " ] }, "metadata": {}, @@ -2914,30 +1950,95 @@ } ], "source": [ - "ds.sea_surface_temperature.sel(time='2024-01-02', lon=slice(120, 150), lat=slice(-30, -50)).plot()" + "ds.sea_surface_temperature.sel(lat=-40, lon=130, method='nearest').plot()" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "9a0a9794-547f-46bd-b015-9f0dd53cd8d7", + "execution_count": 5, + "id": "d68a2b14-96ee-4e3a-bbc6-306b79dfccdf", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import traceback\n", + "\n", + "def plot_sst(ds, start_date, lon_slice, lat_slice):\n", + " \"\"\"\n", + " Plots SST data for 6 consecutive days starting from start_date.\n", + "\n", + " Parameters:\n", + " - ds: xarray.Dataset containing the SST data.\n", + " - start_date: str, start date in 'YYYY-MM-DD' format.\n", + " - lon_slice: tuple, longitude slice (start_lon, end_lon).\n", + " - lat_slice: tuple, latitude slice (start_lat, end_lat).\n", + " \"\"\"\n", + " # Parse the start date\n", + " start_date_parsed = pd.to_datetime(start_date)\n", + "\n", + " # Ensure the dataset has a time dimension and it's sorted\n", + " assert 'time' in ds.dims, \"Dataset does not have a 'time' dimension\"\n", + " ds = ds.sortby('time')\n", + " \n", + " # Find the nearest date in the dataset\n", + " nearest_date = ds.sel(time=start_date_parsed, method='nearest').time\n", + " \n", + " # Get the index of the nearest date\n", + " nearest_date_index = ds.time.where(ds.time == nearest_date, drop=True).squeeze().values\n", + " \n", + " # Find the position of the nearest date in the time array\n", + " nearest_date_position = int((ds.time == nearest_date_index).argmax().values)\n", + " \n", + " # Get the next 6 date values including the nearest date\n", + " dates = ds.time[nearest_date_position:nearest_date_position + 6].values\n", + " dates = [pd.Timestamp(date) for date in dates]\n", + "\n", + " print(dates)\n", + " # Create subplots\n", + " fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 10))\n", + " axes = axes.flatten()\n", + "\n", + " # Plot SST for each date\n", + " for ax, date in zip(axes, dates):\n", + " try:\n", + " sst_data_kelvin = ds.sea_surface_temperature.sel(time=date.strftime('%Y-%m-%d'), lon=slice(lon_slice[0], lon_slice[1]), lat=slice(lat_slice[0], lat_slice[1]))\n", + " \n", + " # Convert Kelvin to Celsius for plotting\n", + " sst_data_celsius = sst_data_kelvin - 273.15\n", + "\n", + " sst_data_celsius.plot(ax=ax, cmap='coolwarm', cbar_kwargs={'label': 'SST (°C)'}) # Using 'coolwarm' colormap\n", + " ax.set_title(date.strftime('%Y-%m-%d'))\n", + " except KeyError:\n", + " # Print traceback for the KeyError\n", + " #traceback.print_exc()\n", + " # Handle the case where data for a specific date is not available\n", + " ax.set_title(f\"No data for {date.strftime('%Y-%m-%d')}\")\n", + " ax.axis('off')\n", + "\n", + " # Adjust layout\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "00f5e4b0-3ac1-4c4c-bd80-873ecf477023", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[Timestamp('2024-01-02 09:20:00'), Timestamp('2024-01-03 09:20:00'), Timestamp('2024-01-04 09:20:00'), Timestamp('2024-01-05 09:20:00'), Timestamp('2024-01-06 09:20:00'), Timestamp('2024-01-07 09:20:00')]\n" + ] }, { "data": { - "image/png": "", + "image/png": "\n", "text/plain": [ - "
    " + "
    " ] }, "metadata": {}, @@ -2945,13 +2046,13 @@ } ], "source": [ - "ds.sea_surface_temperature.sel(lat=-40, lon=130, method='nearest').plot()" + "plot_sst(ds, start_date='2024-01-02', lon_slice=(120, 150), lat_slice=(-30, -50))" ] }, { "cell_type": "code", "execution_count": null, - "id": "502c1c78-b0a3-4954-ba37-7d603606e594", + "id": "a784a4cf-f50c-4b7c-889b-c330f9b8a1d2", "metadata": {}, "outputs": [], "source": [] diff --git a/poetry.lock b/poetry.lock index 60503e2..89317c9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,24 +2,24 @@ [[package]] name = "aiobotocore" -version = "2.13.0" +version = "2.13.1" description = "Async client for aws services using botocore and aiohttp" optional = false python-versions = ">=3.8" files = [ - {file = "aiobotocore-2.13.0-py3-none-any.whl", hash = "sha256:f812afc678d71b0038fd1ce712ff111ab7f47bab81ce5b4c7d222d4b83bc0cb2"}, - {file = "aiobotocore-2.13.0.tar.gz", hash = "sha256:4badf5cab6ad400216319d14278e2c99ad9b708e28a0f231605a412e632de401"}, + {file = "aiobotocore-2.13.1-py3-none-any.whl", hash = "sha256:1bef121b99841ee3cc788e4ed97c332ba32353b1f00e886d1beb3aae95520858"}, + {file = "aiobotocore-2.13.1.tar.gz", hash = "sha256:134f9606c2f91abde38cbc61c3241113e26ff244633e0c31abb7e09da3581c9b"}, ] [package.dependencies] aiohttp = ">=3.9.2,<4.0.0" aioitertools = ">=0.5.1,<1.0.0" -botocore = ">=1.34.70,<1.34.107" +botocore = ">=1.34.70,<1.34.132" wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.32.70,<1.32.107)"] -boto3 = ["boto3 (>=1.34.70,<1.34.107)"] +awscli = ["awscli (>=1.32.70,<1.33.14)"] +boto3 = ["boto3 (>=1.34.70,<1.34.132)"] [[package]] name = "aiohttp" @@ -142,6 +142,61 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "alabaster" +version = "0.7.16" +description = "A light, configurable Sphinx theme" +optional = false +python-versions = ">=3.9" +files = [ + {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"}, + {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "antlr4-python3-runtime" +version = "4.13.1" +description = "ANTLR 4.13.1 runtime for Python 3" +optional = false +python-versions = "*" +files = [ + {file = "antlr4-python3-runtime-4.13.1.tar.gz", hash = "sha256:3cd282f5ea7cfb841537fe01f143350fdb1c0b1ce7981443a2fa8513fddb6d1a"}, + {file = "antlr4_python3_runtime-4.13.1-py3-none-any.whl", hash = "sha256:78ec57aad12c97ac039ca27403ad61cb98aaec8a3f9bb8144f889aa0fa28b943"}, +] + +[[package]] +name = "anyio" +version = "4.4.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, + {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + [[package]] name = "appnope" version = "0.1.4" @@ -193,6 +248,55 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "aws-sam-translator" +version = "1.89.0" +description = "AWS SAM Translator is a library that transform SAM templates into AWS CloudFormation templates" +optional = false +python-versions = "!=4.0,<=4.0,>=3.8" +files = [ + {file = "aws_sam_translator-1.89.0-py3-none-any.whl", hash = "sha256:843be1b5ca7634f700ad0c844a7e0dc42858f35da502e91691473eadd1731ded"}, + {file = "aws_sam_translator-1.89.0.tar.gz", hash = "sha256:fff1005d0b1f3cb511d0ac7e85f54af06afc9d9e433df013a2338d7a0168d174"}, +] + +[package.dependencies] +boto3 = ">=1.19.5,<2.dev0" +jsonschema = ">=3.2,<5" +pydantic = ">=1.8,<3" +typing-extensions = ">=4.4" + +[package.extras] +dev = ["black (==24.3.0)", "boto3 (>=1.23,<2)", "boto3-stubs[appconfig,serverlessrepo] (>=1.19.5,<2.dev0)", "coverage (>=5.3,<8)", "dateparser (>=1.1,<2.0)", "mypy (>=1.3.0,<1.4.0)", "parameterized (>=0.7,<1.0)", "pytest (>=6.2,<8)", "pytest-cov (>=2.10,<5)", "pytest-env (>=0.6,<1)", "pytest-rerunfailures (>=9.1,<12)", "pytest-xdist (>=2.5,<4)", "pyyaml (>=6.0,<7.0)", "requests (>=2.28,<3.0)", "ruamel.yaml (==0.17.21)", "ruff (>=0.1.0,<0.2.0)", "tenacity (>=8.0,<9.0)", "types-PyYAML (>=6.0,<7.0)", "types-jsonschema (>=3.2,<4.0)"] + +[[package]] +name = "aws-xray-sdk" +version = "2.14.0" +description = "The AWS X-Ray SDK for Python (the SDK) enables Python developers to record and emit information from within their applications to the AWS X-Ray service." +optional = false +python-versions = ">=3.7" +files = [ + {file = "aws_xray_sdk-2.14.0-py2.py3-none-any.whl", hash = "sha256:cfbe6feea3d26613a2a869d14c9246a844285c97087ad8f296f901633554ad94"}, + {file = "aws_xray_sdk-2.14.0.tar.gz", hash = "sha256:aab843c331af9ab9ba5cefb3a303832a19db186140894a523edafc024cc0493c"}, +] + +[package.dependencies] +botocore = ">=1.11.3" +wrapt = "*" + +[[package]] +name = "babel" +version = "2.15.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.8" +files = [ + {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, + {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, +] + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] + [[package]] name = "backcall" version = "0.2.0" @@ -204,15 +308,77 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "bcrypt" +version = "4.1.3" +description = "Modern password hashing for your software and your servers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bcrypt-4.1.3-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:48429c83292b57bf4af6ab75809f8f4daf52aa5d480632e53707805cc1ce9b74"}, + {file = "bcrypt-4.1.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a8bea4c152b91fd8319fef4c6a790da5c07840421c2b785084989bf8bbb7455"}, + {file = "bcrypt-4.1.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d3b317050a9a711a5c7214bf04e28333cf528e0ed0ec9a4e55ba628d0f07c1a"}, + {file = "bcrypt-4.1.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:094fd31e08c2b102a14880ee5b3d09913ecf334cd604af27e1013c76831f7b05"}, + {file = "bcrypt-4.1.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4fb253d65da30d9269e0a6f4b0de32bd657a0208a6f4e43d3e645774fb5457f3"}, + {file = "bcrypt-4.1.3-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:193bb49eeeb9c1e2db9ba65d09dc6384edd5608d9d672b4125e9320af9153a15"}, + {file = "bcrypt-4.1.3-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:8cbb119267068c2581ae38790e0d1fbae65d0725247a930fc9900c285d95725d"}, + {file = "bcrypt-4.1.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6cac78a8d42f9d120b3987f82252bdbeb7e6e900a5e1ba37f6be6fe4e3848286"}, + {file = "bcrypt-4.1.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01746eb2c4299dd0ae1670234bf77704f581dd72cc180f444bfe74eb80495b64"}, + {file = "bcrypt-4.1.3-cp37-abi3-win32.whl", hash = "sha256:037c5bf7c196a63dcce75545c8874610c600809d5d82c305dd327cd4969995bf"}, + {file = "bcrypt-4.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:8a893d192dfb7c8e883c4576813bf18bb9d59e2cfd88b68b725990f033f1b978"}, + {file = "bcrypt-4.1.3-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0d4cf6ef1525f79255ef048b3489602868c47aea61f375377f0d00514fe4a78c"}, + {file = "bcrypt-4.1.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5698ce5292a4e4b9e5861f7e53b1d89242ad39d54c3da451a93cac17b61921a"}, + {file = "bcrypt-4.1.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec3c2e1ca3e5c4b9edb94290b356d082b721f3f50758bce7cce11d8a7c89ce84"}, + {file = "bcrypt-4.1.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3a5be252fef513363fe281bafc596c31b552cf81d04c5085bc5dac29670faa08"}, + {file = "bcrypt-4.1.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5f7cd3399fbc4ec290378b541b0cf3d4398e4737a65d0f938c7c0f9d5e686611"}, + {file = "bcrypt-4.1.3-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:c4c8d9b3e97209dd7111bf726e79f638ad9224b4691d1c7cfefa571a09b1b2d6"}, + {file = "bcrypt-4.1.3-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:31adb9cbb8737a581a843e13df22ffb7c84638342de3708a98d5c986770f2834"}, + {file = "bcrypt-4.1.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:551b320396e1d05e49cc18dd77d970accd52b322441628aca04801bbd1d52a73"}, + {file = "bcrypt-4.1.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6717543d2c110a155e6821ce5670c1f512f602eabb77dba95717ca76af79867d"}, + {file = "bcrypt-4.1.3-cp39-abi3-win32.whl", hash = "sha256:6004f5229b50f8493c49232b8e75726b568535fd300e5039e255d919fc3a07f2"}, + {file = "bcrypt-4.1.3-cp39-abi3-win_amd64.whl", hash = "sha256:2505b54afb074627111b5a8dc9b6ae69d0f01fea65c2fcaea403448c503d3991"}, + {file = "bcrypt-4.1.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:cb9c707c10bddaf9e5ba7cdb769f3e889e60b7d4fea22834b261f51ca2b89fed"}, + {file = "bcrypt-4.1.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9f8ea645eb94fb6e7bea0cf4ba121c07a3a182ac52876493870033141aa687bc"}, + {file = "bcrypt-4.1.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f44a97780677e7ac0ca393bd7982b19dbbd8d7228c1afe10b128fd9550eef5f1"}, + {file = "bcrypt-4.1.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d84702adb8f2798d813b17d8187d27076cca3cd52fe3686bb07a9083930ce650"}, + {file = "bcrypt-4.1.3.tar.gz", hash = "sha256:2ee15dd749f5952fe3f0430d0ff6b74082e159c50332a1413d51b5689cf06623"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + +[[package]] +name = "blinker" +version = "1.8.2" +description = "Fast, simple object-to-object and broadcast signaling" +optional = false +python-versions = ">=3.8" +files = [ + {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"}, + {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"}, +] + [[package]] name = "bokeh" -version = "3.4.1" +version = "3.4.2" description = "Interactive plots and applications in the browser from Python" optional = false python-versions = ">=3.9" files = [ - {file = "bokeh-3.4.1-py3-none-any.whl", hash = "sha256:1e3c502a0a8205338fc74dadbfa321f8a0965441b39501e36796a47b4017b642"}, - {file = "bokeh-3.4.1.tar.gz", hash = "sha256:d824961e4265367b0750ce58b07e564ad0b83ca64b335521cd3421e9b9f10d89"}, + {file = "bokeh-3.4.2-py3-none-any.whl", hash = "sha256:931a43ee59dbf1720383ab904f8205e126b85561aac55592415b800c96f1b0eb"}, + {file = "bokeh-3.4.2.tar.gz", hash = "sha256:a16d5cc0abb93d2d270d70fc35851f3e1b9208814a985a4678e0ba5ef2d9cd42"}, ] [package.dependencies] @@ -228,17 +394,17 @@ xyzservices = ">=2021.09.1" [[package]] name = "boto3" -version = "1.34.106" +version = "1.34.131" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.106-py3-none-any.whl", hash = "sha256:d3be4e1dd5d546a001cd4da805816934cbde9d395316546e9411fec341ade5cf"}, - {file = "boto3-1.34.106.tar.gz", hash = "sha256:6165b8cf1c7e625628ab28b32f9027064c8f5e5fca1c38d7fc228cd22069a19f"}, + {file = "boto3-1.34.131-py3-none-any.whl", hash = "sha256:05e388cb937e82be70bfd7eb0c84cf8011ff35cf582a593873ac21675268683b"}, + {file = "boto3-1.34.131.tar.gz", hash = "sha256:dab8f72a6c4e62b4fd70da09e08a6b2a65ea2115b27dd63737142005776ef216"}, ] [package.dependencies] -botocore = ">=1.34.106,<1.35.0" +botocore = ">=1.34.131,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -247,13 +413,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.106" +version = "1.34.131" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.106-py3-none-any.whl", hash = "sha256:4baf0e27c2dfc4f4d0dee7c217c716e0782f9b30e8e1fff983fce237d88f73ae"}, - {file = "botocore-1.34.106.tar.gz", hash = "sha256:921fa5202f88c3e58fdcb4b3acffd56d65b24bca47092ee4b27aa988556c0be6"}, + {file = "botocore-1.34.131-py3-none-any.whl", hash = "sha256:13b011d7b206ce00727dcee26548fa3b550db9046d5a0e90ac25a6e6c8fde6ef"}, + {file = "botocore-1.34.131.tar.gz", hash = "sha256:502ddafe1d627fcf1e4c007c86454e5dd011dba7c58bd8e8a5368a79f3e387dc"}, ] [package.dependencies] @@ -262,58 +428,57 @@ python-dateutil = ">=2.1,<3.0.0" urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} [package.extras] -crt = ["awscrt (==0.20.9)"] +crt = ["awscrt (==0.20.11)"] [[package]] name = "bottleneck" -version = "1.3.8" +version = "1.4.0" description = "Fast NumPy array functions written in C" optional = false python-versions = "*" files = [ - {file = "Bottleneck-1.3.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:865c8ed5b798c0198b0b80553e09cc0d890c4f5feb3d81d31661517ca7819fa3"}, - {file = "Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d073a31e259d40b25e29dbba80f73abf38afe98fd730c79dad7edd9a0ad6cff5"}, - {file = "Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b806b277ab47495032822f55f43b8d336e4b7e73f8506ed34d3ea3da6d644abc"}, - {file = "Bottleneck-1.3.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:770b517609916adeb39d3b1a386a29bc316da03dd61e7ee6e8a38325b80cc327"}, - {file = "Bottleneck-1.3.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2948502b0394ee419945b55b092585222a505c61d41a874c741be49f2cac056f"}, - {file = "Bottleneck-1.3.8-cp310-cp310-win32.whl", hash = "sha256:271b6333522beb8aee32e640ba49a2064491d2c10317baa58a5996be3dd443e4"}, - {file = "Bottleneck-1.3.8-cp310-cp310-win_amd64.whl", hash = "sha256:d41000ea7ca196b5fd39d6fccd34bf0704c8831731cedd2da2dcae3c6ac49c42"}, - {file = "Bottleneck-1.3.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0a7f454394cd3642498b6e077e70f4a6b9fd46a8eb908c83ac737fdc9f9a98c"}, - {file = "Bottleneck-1.3.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c4ea8b9024dcb4e83b5c118a3c8faa863ace2ad572849da548a74a8ee4e8f2a"}, - {file = "Bottleneck-1.3.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f40724b6e965ff5b88b333d4a10097b1629e60c0db21bb3d08c24d7b1a904a16"}, - {file = "Bottleneck-1.3.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4bd7183b8dcca89d0e65abe4507c19667dd31dacfbcc8ed705bad642f26a46e1"}, - {file = "Bottleneck-1.3.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:20aa31a7d9d747c499ace1610a6e1f7aba6e3d4a9923e0312f6b4b6d68a59af3"}, - {file = "Bottleneck-1.3.8-cp311-cp311-win32.whl", hash = "sha256:350520105d9449e6565b3f0c4ce1f80a0b3e4d63695ebbf29db41f62e13f6461"}, - {file = "Bottleneck-1.3.8-cp311-cp311-win_amd64.whl", hash = "sha256:167a278902775defde7dfded6e98e3707dfe54971ffd9aec25c43bc74e4e381a"}, - {file = "Bottleneck-1.3.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c6e93ed45c6c83392f73d0333b310b38772df7eb78c120c1447245691bdedaf4"}, - {file = "Bottleneck-1.3.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3400f47dda0196b5af50b0b0678e33cc8c42e52e55ae0a63cdfed60725659bc"}, - {file = "Bottleneck-1.3.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fba5fd1805c71b2eeea50bea93d59be449c4af23ebd8da5f75fd74fd0331e314"}, - {file = "Bottleneck-1.3.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:60139c5c3d2a9c1454a04af5ee981a9f56548d27fa36f264069b149a6e9b01ed"}, - {file = "Bottleneck-1.3.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:99fab17fa26c811ccad63e208314726e718ae6605314329eca09641954550523"}, - {file = "Bottleneck-1.3.8-cp312-cp312-win32.whl", hash = "sha256:d3ae2bb5d4168912e438e377cc1301fa01df949ba59cd86317b3e00404fd4a97"}, - {file = "Bottleneck-1.3.8-cp312-cp312-win_amd64.whl", hash = "sha256:bcba1d5d5328c50f94852ab521fcb26f35d9e0ccd928d120d56455d1a5bb743f"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8d01fd5389d3160d54619119987ac24b020fa6810b7b398fff4945892237b3da"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca25f0003ef65264942f6306d793e0f270ece8b406c5a293dfc7d878146e9f8"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7763cf1516fa388c3587d12182fc1bc1c8089eab1a0a1bf09761f4c41af73c"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:38837c022350e2a656453f0e448416b7108cf67baccf11d04a0b3b70a48074dd"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:84ca5e741fae1c1796744dbdd0d2c1789cb74dd79c12ea8ec5834f83430f8520"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-win32.whl", hash = "sha256:f4dfc22a3450227e692ef2ff4657639c33eec88ad04ee3ce29d1a23a4942da24"}, - {file = "Bottleneck-1.3.8-cp37-cp37m-win_amd64.whl", hash = "sha256:90b87eed152bbd760c4eb11473c2cf036abdb26e2f84caeb00787da74fb08c40"}, - {file = "Bottleneck-1.3.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:54a1b5d9d63b2d9f2955f8542eea26c418f97873e0abf86ca52beea0208c9306"}, - {file = "Bottleneck-1.3.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:019dd142d1e870388fb0b649213a0d8e569cce784326e183deba8f17826edd9f"}, - {file = "Bottleneck-1.3.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b5ed34a540eb7df59f45da659af9f792306637de1c69c95f020294f3b9fc4a8"}, - {file = "Bottleneck-1.3.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b69fcd4d818bcf9d53497d8accd0d5f852a447728baaa33b9b7168f8c4221d06"}, - {file = "Bottleneck-1.3.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:02616a830bd477f5ba51103396092da4b9d83cea2e88f5b8069e3f4f7b796704"}, - {file = "Bottleneck-1.3.8-cp38-cp38-win32.whl", hash = "sha256:93d359fb83eb3bdd6635ef6e64835c38ffdc211441fc190549f286e6af98b5f6"}, - {file = "Bottleneck-1.3.8-cp38-cp38-win_amd64.whl", hash = "sha256:51c8bb3dffeb72c14f0382b80de76eabac6726d316babbd48f7e4056267d7910"}, - {file = "Bottleneck-1.3.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:84453548b0f722c3be912ce3c6b685917fea842bf1252eeb63714a2c1fd1ffc9"}, - {file = "Bottleneck-1.3.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92700867504a213cafa9b8d9be529bd6e18dc83366b2ba00e86e80769b93f678"}, - {file = "Bottleneck-1.3.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fadfd2f3931fdff42f4b9867eb02ed7c662d01e6099ff6b347b6ced791450651"}, - {file = "Bottleneck-1.3.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:cfbc4a3a934b677bfbc37ac8757c4e1264a76262b774259bd3fa8a265dbd668b"}, - {file = "Bottleneck-1.3.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3c74c18f86a1ffac22280b005df8bb8a58505ac6663c4d6807f39873c17dc347"}, - {file = "Bottleneck-1.3.8-cp39-cp39-win32.whl", hash = "sha256:211f881159e8adb3a57df2263028ae6dc89ec4328bfd43f3421e507406c28654"}, - {file = "Bottleneck-1.3.8-cp39-cp39-win_amd64.whl", hash = "sha256:8615eeb75009ba7c0a112a5a6a5154ed3d61fd6b0879631778b3e42e2d9a6d65"}, - {file = "Bottleneck-1.3.8.tar.gz", hash = "sha256:6780d896969ba7f53c8995ba90c87c548beb3db435dc90c60b9a10ed1ab4d868"}, + {file = "Bottleneck-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2110af22aa8c2779faba8aa021d6b559df04449bdf21d510eacd7910934189fe"}, + {file = "Bottleneck-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:381cbd1e52338fcdf9ff01c962e6aa187b2d8b3b369d42e779b6d33ac61f8d35"}, + {file = "Bottleneck-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a91e40bbb8452e77772614d882be2c34b3b514d9f15460f703293525a6e173d"}, + {file = "Bottleneck-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:59604949aea476f5075b965129eaa3c2d90891fd43b0dfaf2ad7621bb5db14a5"}, + {file = "Bottleneck-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c2c92545e1bc8e859d8d137aefa3b24843bd374b17c9814dafa3bbcea9fc4ec0"}, + {file = "Bottleneck-1.4.0-cp310-cp310-win32.whl", hash = "sha256:f63e79bfa2f82a7432c8b147ed321d01ca7769bc17cc04644286a4ce58d30549"}, + {file = "Bottleneck-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:d69907d8d679cb5091a3f479c46bf1076f149f6311ff3298bac5089b86a2fab1"}, + {file = "Bottleneck-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:67347b0f01f32a232a6269c37afc1c079e08f6455fa12e91f4a1cd12eb0d11a5"}, + {file = "Bottleneck-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1490348b3bbc0225523dc2c00c6bb3e66168c537d62797bd29783c0826c09838"}, + {file = "Bottleneck-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a704165552496cbcc8bcc5921bb679fd6fa66bb1e758888de091b1223231c9f0"}, + {file = "Bottleneck-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ffb4e4edf7997069719b9269926cc00a2a12c6e015422d1ebc2f621c4541396a"}, + {file = "Bottleneck-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5d6bf45ed58d5e7414c0011ef2da75474fe597a51970df83596b0bcb79c14c5e"}, + {file = "Bottleneck-1.4.0-cp311-cp311-win32.whl", hash = "sha256:ed209f8f3cb9954773764b0fa2510a7a9247ad245593187ac90bd0747771bc5c"}, + {file = "Bottleneck-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53f1a72b12cfd76b56934c33bc0cb7c1a295f23a2d3ffba8c764514c9b5e0ff"}, + {file = "Bottleneck-1.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e720ff24370324c84a82b1a18195274715c23181748b2b9e3dacad24198ca06f"}, + {file = "Bottleneck-1.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44305c70c2a1539b0ae968e033f301ad868a6146b47e3cccd73fdfe3fc07c4ee"}, + {file = "Bottleneck-1.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b4dac5d2a871b7bd296c2b92426daa27d5b07aa84ef2557db097d29135da4eb"}, + {file = "Bottleneck-1.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fbcdd01db9e27741fb16a02b720cf02389d4b0b99cefe3c834c7df88c2d7412d"}, + {file = "Bottleneck-1.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:14b3334a39308fbb05dacd35ac100842aa9e9bc70afbdcebe43e46179d183fd0"}, + {file = "Bottleneck-1.4.0-cp312-cp312-win32.whl", hash = "sha256:520d7a83cd48b3f58e5df1a258acb547f8a5386a8c21ca9e1058d83a0d622fdf"}, + {file = "Bottleneck-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b1339b9ad3ee217253f246cde5c3789eb527cf9dd31ff0a1f5a8bf7fc89eadad"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2749602200aaa0e12a0f3f936dd6d4035384ad10d3acf7ac4f418c501683397"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb79a2ac135567694f13339f0bebcee96aec09c596b324b61cd7fd5e306f49d"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c6097bf39723e76ff5bba160daab92ae599df212c859db8d46648548584d04a8"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b5f72b66ccc0272de46b67346cf8490737ba2adc6a302664f5326e7741b6d5ab"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-win32.whl", hash = "sha256:9903f017b9d6f2f69ce241b424ddad7265624f64dc6eafbe257d45661febf8bd"}, + {file = "Bottleneck-1.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:834816c316ad184cae7ecb615b69876a42cd2cafb07ee66c57a9c1ccacb63339"}, + {file = "Bottleneck-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:03c43150f180d86a5633a6da788660d335983f6798fca306ba7f47ff27a1b7e7"}, + {file = "Bottleneck-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea333dbcadb780356c54f5c4fa7754f143573b57508fff43d5daf63298eb26a"}, + {file = "Bottleneck-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6179791c0119aec3708ef74ddadab8d183e3742adb93a9028718e8696bdf572b"}, + {file = "Bottleneck-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:220b72405f77aebb0137b733b464c2526ded471e4289ac1e840bab8852759a55"}, + {file = "Bottleneck-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8746f0f727997ce4c7457dc1fec4e4e3c0fdd8803514baa3d1c4ea6515ab04b2"}, + {file = "Bottleneck-1.4.0-cp38-cp38-win32.whl", hash = "sha256:6a36280ee33d9db799163f04e88b950261e590cc71d089f5e179b21680b5d491"}, + {file = "Bottleneck-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:de17e012694e6a987bb4eb050dd7f0cf939195a8e00cb23aa93ebee5fd5e64a8"}, + {file = "Bottleneck-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28260197ab8a4a6b7adf810523147b1a3e85607f4e26a0f685eb9d155cfc75af"}, + {file = "Bottleneck-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90d5d188a0cca0b9655ff2904ee61e7f183079e97550be98c2541a2eec358a72"}, + {file = "Bottleneck-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2861ff645d236f1a6f5c6d1ddb3db37d19af1d91057bdc4fd7b76299a15b3079"}, + {file = "Bottleneck-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6136ce7dcf825c432a20b80ab1c460264a437d8430fff32536176147e0b6b832"}, + {file = "Bottleneck-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:889e6855b77345622b4ba927335d3118745d590492941f5f78554f157d259e92"}, + {file = "Bottleneck-1.4.0-cp39-cp39-win32.whl", hash = "sha256:817aa43a671ede696ea023d8f35839a391244662340cc95a0f46965dda8b35cf"}, + {file = "Bottleneck-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:23834d82177d6997f21fa63156550668cd07a9a6e5a1b66ea80f1a14ac6ffd07"}, + {file = "bottleneck-1.4.0.tar.gz", hash = "sha256:beb36df519b8709e7d357c0c9639b03b885ca6355bbf5e53752c685de51605b8"}, ] [package.dependencies] @@ -333,6 +498,70 @@ files = [ {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "cfgv" version = "3.4.0" @@ -344,6 +573,32 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "cfn-lint" +version = "1.5.0" +description = "Checks CloudFormation templates for practices and behaviour that could potentially be improved" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfn_lint-1.5.0-py3-none-any.whl", hash = "sha256:46ecae4dc6dc0eedf7a4169fbe24d893ff3125c3b53d05d3cab196f704709731"}, + {file = "cfn_lint-1.5.0.tar.gz", hash = "sha256:7fdcf1b1393ace49d50f7e8f047f90811a1c463a1cd57489d4781a31f205a8d0"}, +] + +[package.dependencies] +aws-sam-translator = ">=1.89.0" +jsonpatch = "*" +networkx = ">=2.4,<4" +pyyaml = ">5.4" +regex = "*" +sympy = ">=1.0.0" +typing-extensions = "*" + +[package.extras] +full = ["jschema-to-python (>=1.2.3,<1.3.0)", "junit-xml (>=1.9,<2.0)", "pydot", "sarif-om (>=1.0.4,<1.1.0)"] +graph = ["pydot"] +junit = ["junit-xml (>=1.9,<2.0)"] +sarif = ["jschema-to-python (>=1.2.3,<1.3.0)", "sarif-om (>=1.0.4,<1.1.0)"] + [[package]] name = "cftime" version = "1.6.4" @@ -380,6 +635,7 @@ files = [ {file = "cftime-1.6.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25f043703e785de0bd7cd8222c0a53317e9aeb3dfc062588b05e6f3ebb007468"}, {file = "cftime-1.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f9acc272df1022f24fe7dbe9de43fa5d8271985161df14549e4d8d28c90dc9ea"}, {file = "cftime-1.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:e8467b6fbf8dbfe0be8c04d61180765fdd3b9ab0fe51313a0bbf87e63634a3d8"}, + {file = "cftime-1.6.4.tar.gz", hash = "sha256:e325406193758a7ed67308deb52e727782a19e384e183378e7ff62098be0aedc"}, ] [package.dependencies] @@ -499,49 +755,57 @@ files = [ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] -name = "click-plugins" -version = "1.1.1" -description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +name = "cloudpickle" +version = "3.0.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, - {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, + {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, + {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, ] -[package.dependencies] -click = ">=4.0" - -[package.extras] -dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] - [[package]] -name = "cligj" -version = "0.7.2" -description = "Click params for commmand line interfaces to GeoJSON" +name = "coiled" +version = "1.34.0" +description = "Python client for coiled.io dask clusters" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" +python-versions = ">=3.8" files = [ - {file = "cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df"}, - {file = "cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27"}, + {file = "coiled-1.34.0-py3-none-any.whl", hash = "sha256:fc628b3cba6adf86ed23a3cad0abcd4b3975b7e54b4a9288ff4eb395094c8938"}, + {file = "coiled-1.34.0.tar.gz", hash = "sha256:cbfbb3fafe8a6a7b2547e4a4d91404d5aeeb65d4d39dc5dd7db0df8b882a29f9"}, ] [package.dependencies] -click = ">=4.0" +aiohttp = "*" +backoff = ">=1.11.0" +boto3 = "*" +click = ">=7.1" +dask = ">=2022.02.1" +distributed = ">=2022.02.1" +fabric = ">=3.0" +filelock = "*" +gilknocker = ">=0.4.1" +httpx = {version = ">=0.15", extras = ["http2"]} +importlib-metadata = "*" +invoke = ">=2.0" +ipywidgets = "*" +jmespath = "*" +jsondiff = "*" +packaging = "*" +paramiko = ">=2.4" +pip = ">=19.3" +pip-requirements-parser = "*" +prometheus-client = "*" +rich = ">=11.2.0" +setuptools = ">=49.3.0" +toml = "*" +typing-extensions = "*" +wheel = "*" [package.extras] -test = ["pytest-cov"] - -[[package]] -name = "cloudpickle" -version = "3.0.0" -description = "Pickler class to extend the standard pickle.Pickler functionality" -optional = false -python-versions = ">=3.8" -files = [ - {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, - {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, -] +azure = ["azure-identity", "azure-mgmt-resource", "azure-mgmt-subscription"] +notebook = ["distributed (>=2022.8.1)", "jupyter-server-proxy", "jupyterlab"] [[package]] name = "colorama" @@ -554,6 +818,23 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "comm" +version = "0.2.2" +description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." +optional = false +python-versions = ">=3.8" +files = [ + {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, + {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"}, +] + +[package.dependencies] +traitlets = ">=4" + +[package.extras] +test = ["pytest"] + [[package]] name = "contourpy" version = "1.2.1" @@ -619,63 +900,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.5.3" +version = "7.5.4" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, - {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, - {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, - {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, - {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, - {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, - {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, - {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, - {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, - {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, - {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, - {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, - {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, - {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, - {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, - {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, - {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, - {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, - {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, - {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, - {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, - {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, - {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, - {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, - {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, - {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, - {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, - {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, - {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, - {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, - {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, - {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"}, + {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"}, + {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"}, + {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"}, + {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"}, + {file = "coverage-7.5.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233"}, + {file = "coverage-7.5.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e"}, + {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c"}, + {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805"}, + {file = "coverage-7.5.4-cp312-cp312-win32.whl", hash = "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b"}, + {file = "coverage-7.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7"}, + {file = "coverage-7.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882"}, + {file = "coverage-7.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4"}, + {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f"}, + {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f"}, + {file = "coverage-7.5.4-cp38-cp38-win32.whl", hash = "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f"}, + {file = "coverage-7.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"}, + {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"}, + {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"}, + {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"}, + {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"}, ] [package.dependencies] @@ -684,6 +965,60 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "42.0.8" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e"}, + {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949"}, + {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b"}, + {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7"}, + {file = "cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2"}, + {file = "cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba"}, + {file = "cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c"}, + {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1"}, + {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14"}, + {file = "cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c"}, + {file = "cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71"}, + {file = "cryptography-42.0.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648"}, + {file = "cryptography-42.0.8-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad"}, + {file = "cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "cycler" version = "0.12.1" @@ -701,13 +1036,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "dask" -version = "2024.5.2" +version = "2024.6.2" description = "Parallel PyData with Task Scheduling" optional = false python-versions = ">=3.9" files = [ - {file = "dask-2024.5.2-py3-none-any.whl", hash = "sha256:acc2cfe41d9e0151c216ac40396dbe34df13bc3d8c51dfece190349e4f2243af"}, - {file = "dask-2024.5.2.tar.gz", hash = "sha256:5c9722c44d0195e78b6e54197aa3302e6fcaaac2310fd3014560bcb86253dcb3"}, + {file = "dask-2024.6.2-py3-none-any.whl", hash = "sha256:81b80ee015b2e057b93bb2d1bf13a866136e762e2b24bf54b6b621e8b86b7708"}, + {file = "dask-2024.6.2.tar.gz", hash = "sha256:d429d6b19e85fd1306ac37c188aaf99d03bbe69a6fe59d2b42882b2ac188686f"}, ] [package.dependencies] @@ -715,7 +1050,7 @@ bokeh = {version = ">=2.4.2", optional = true, markers = "extra == \"diagnostics click = ">=8.1" cloudpickle = ">=1.5.0" dask-expr = {version = ">=1.1,<1.2", optional = true, markers = "extra == \"dataframe\""} -distributed = {version = "2024.5.2", optional = true, markers = "extra == \"distributed\""} +distributed = {version = "2024.6.2", optional = true, markers = "extra == \"distributed\""} fsspec = ">=2021.09.0" importlib-metadata = {version = ">=4.13.0", markers = "python_version < \"3.12\""} jinja2 = {version = ">=2.10.3", optional = true, markers = "extra == \"diagnostics\""} @@ -734,25 +1069,28 @@ array = ["numpy (>=1.21)"] complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] dataframe = ["dask-expr (>=1.1,<1.2)", "dask[array]", "pandas (>=1.3)"] diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2024.5.2)"] +distributed = ["distributed (==2024.6.2)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] [[package]] name = "dask-expr" -version = "1.1.2" +version = "1.1.6" description = "High Level Expressions for Dask" optional = false python-versions = ">=3.9" files = [ - {file = "dask_expr-1.1.2-py3-none-any.whl", hash = "sha256:3be69fb2d449b5edf4404e953b7f6e688426872c6eb10f239539ead716a06f7a"}, - {file = "dask_expr-1.1.2.tar.gz", hash = "sha256:ce2e3803b638cdc67bc75326e1b0d36ea9d231fdddf086e727145a5a2769bed4"}, + {file = "dask_expr-1.1.6-py3-none-any.whl", hash = "sha256:04e31cb941b7cbdce7b1384f2bcf17fd17e828e45e9c74491e28473095ee6891"}, + {file = "dask_expr-1.1.6.tar.gz", hash = "sha256:ace366c6d9c248a7fa7b48f7a34140afae3b6a0ea14ee57743babe5a9d1ef43f"}, ] [package.dependencies] -dask = "2024.5.2" +dask = "2024.6.2" pandas = ">=2" pyarrow = ">=7.0.0" +[package.extras] +analyze = ["crick", "distributed"] + [[package]] name = "decorator" version = "5.1.1" @@ -764,6 +1102,23 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + [[package]] name = "distlib" version = "0.3.8" @@ -777,19 +1132,19 @@ files = [ [[package]] name = "distributed" -version = "2024.5.2" +version = "2024.6.2" description = "Distributed scheduler for Dask" optional = false python-versions = ">=3.9" files = [ - {file = "distributed-2024.5.2-py3-none-any.whl", hash = "sha256:c0fd59d5c34179d9c9b5dc5acb42a00a06d163107b79f66c2dc73e9479a92286"}, - {file = "distributed-2024.5.2.tar.gz", hash = "sha256:4cee41093e98340d04d9254012c7d521065f64b3f33546dd0b02b00becb41e21"}, + {file = "distributed-2024.6.2-py3-none-any.whl", hash = "sha256:0c1f8ccb1da71273ad8c53c598147dc37e60bef17142fd466cb72618a521880f"}, + {file = "distributed-2024.6.2.tar.gz", hash = "sha256:bb43b766ada860b163956607c80f99871d823c645e326c2b5e35f020351adc55"}, ] [package.dependencies] click = ">=8.0" cloudpickle = ">=1.5.0" -dask = "2024.5.2" +dask = "2024.6.2" jinja2 = ">=2.10.3" locket = ">=1.0.0" msgpack = ">=1.0.0" @@ -803,6 +1158,39 @@ tornado = ">=6.0.4" urllib3 = ">=1.24.3" zict = ">=3.0.0" +[[package]] +name = "docker" +version = "7.1.0" +description = "A Python library for the Docker Engine API." +optional = false +python-versions = ">=3.8" +files = [ + {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"}, + {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"}, +] + +[package.dependencies] +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" + +[package.extras] +dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"] +docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"] +ssh = ["paramiko (>=2.4.3)"] +websockets = ["websocket-client (>=1.3.0)"] + +[[package]] +name = "docutils" +version = "0.21.2" +description = "Docutils -- Python Documentation Utilities" +optional = false +python-versions = ">=3.9" +files = [ + {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, + {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, +] + [[package]] name = "exceptiongroup" version = "1.2.1" @@ -831,6 +1219,26 @@ files = [ [package.extras] testing = ["hatch", "pre-commit", "pytest", "tox"] +[[package]] +name = "fabric" +version = "3.2.2" +description = "High level SSH command execution" +optional = false +python-versions = "*" +files = [ + {file = "fabric-3.2.2-py3-none-any.whl", hash = "sha256:91c47c0be68b14936c88b34da8a1f55e5710fd28397dac5d4ff2e21558113a6f"}, + {file = "fabric-3.2.2.tar.gz", hash = "sha256:8783ca42e3b0076f08b26901aac6b9d9b1f19c410074e7accfab902c184ff4a3"}, +] + +[package.dependencies] +decorator = ">=5" +deprecated = ">=1.2" +invoke = ">=2.0" +paramiko = ">=2.4" + +[package.extras] +pytest = ["pytest (>=7)"] + [[package]] name = "fasteners" version = "0.19" @@ -844,66 +1252,55 @@ files = [ [[package]] name = "filelock" -version = "3.14.0" +version = "3.15.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, - {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, + {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"}, + {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"}, ] [package.extras] docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] typing = ["typing-extensions (>=4.8)"] [[package]] -name = "fiona" -version = "1.9.6" -description = "Fiona reads and writes spatial data files" +name = "flask" +version = "3.0.3" +description = "A simple framework for building complex web applications." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "fiona-1.9.6-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:63e528b5ea3d8b1038d788e7c65117835c787ba7fdc94b1b42f09c2cbc0aaff2"}, - {file = "fiona-1.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:918bd27d8625416672e834593970f96dff63215108f81efb876fe5c0bc58a3b4"}, - {file = "fiona-1.9.6-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:e313210b30d09ed8f829bf625599e248dadd78622728030221f6526580ff26c5"}, - {file = "fiona-1.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:89095c2d542325ee45894b8837e8048cdbb2f22274934e1be3b673ca628010d7"}, - {file = "fiona-1.9.6-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:98cea6f435843b2119731c6b0470e5b7386aa16b6aa7edabbf1ed93aefe029c3"}, - {file = "fiona-1.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f4230eccbd896a79d1ebfa551d84bf90f512f7bcbe1ca61e3f82231321f1a532"}, - {file = "fiona-1.9.6-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:48b6218224e96de5e36b5eb259f37160092260e5de0dcd82ca200b1887aa9884"}, - {file = "fiona-1.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:c1dd5fbc29b7303bb87eb683455e8451e1a53bb8faf20ef97fdcd843c9e4a7f6"}, - {file = "fiona-1.9.6-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:42d8a0e5570948d3821c493b6141866d9a4d7a64edad2be4ecbb89f81904baac"}, - {file = "fiona-1.9.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39819fb8f5ec6d9971cb01b912b4431615a3d3f50c83798565d8ce41917930db"}, - {file = "fiona-1.9.6-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:9b53034efdf93ada9295b081e6a8280af7c75496a20df82d4c2ca46d65b85905"}, - {file = "fiona-1.9.6-cp312-cp312-win_amd64.whl", hash = "sha256:1dcd6eca7524535baf2a39d7981b4a46d33ae28c313934a7c3eae62eecf9dfa5"}, - {file = "fiona-1.9.6-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e5404ed08c711489abcb3a50a184816825b8af06eb73ad2a99e18b8e7b47c96a"}, - {file = "fiona-1.9.6-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:53bedd2989e255df1bf3378ae9c06d6d241ec273c280c544bb44ffffebb97fb0"}, - {file = "fiona-1.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:77653a08564a44e634c44cd74a068d2f55d1d4029edd16d1c8aadcc4d8cc1d2c"}, - {file = "fiona-1.9.6-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:e7617563b36d2be99f048f0d0054b4d765f4aae454398f88f19de9c2c324b7f8"}, - {file = "fiona-1.9.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:50037c3b7a5f6f434b562b5b1a5b664f1caa7a4383b00af23cdb59bfc6ba852c"}, - {file = "fiona-1.9.6-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:bf51846ad602757bf27876f458c5c9f14b09421fac612f64273cc4e3fcabc441"}, - {file = "fiona-1.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:11af1afc1255642a7787fe112c29d01f968f1053e4d4700fc6f3bb879c1622e0"}, - {file = "fiona-1.9.6-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:52e8fec650b72fc5253d8f86b63859acc687182281c29bfacd3930496cf982d1"}, - {file = "fiona-1.9.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9b92aa1badb2773e7cac19bef3064d73e9d80c67c42f0928db2520a04be6f2f"}, - {file = "fiona-1.9.6-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:0eaffbf3bfae9960484c0c08ea461b0c40e111497f04e9475ebf15ac7a22d9dc"}, - {file = "fiona-1.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:f1b49d51a744874608b689f029766aa1e078dd72e94b44cf8eeef6d7bd2e9051"}, - {file = "fiona-1.9.6.tar.gz", hash = "sha256:791b3494f8b218c06ea56f892bd6ba893dfa23525347761d066fb7738acda3b1"}, -] - -[package.dependencies] -attrs = ">=19.2.0" -certifi = "*" -click = ">=8.0,<9.0" -click-plugins = ">=1.0" -cligj = ">=0.5" -six = "*" + {file = "flask-3.0.3-py3-none-any.whl", hash = "sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3"}, + {file = "flask-3.0.3.tar.gz", hash = "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842"}, +] + +[package.dependencies] +blinker = ">=1.6.2" +click = ">=8.1.3" +itsdangerous = ">=2.1.2" +Jinja2 = ">=3.1.2" +Werkzeug = ">=3.0.0" [package.extras] -all = ["fiona[calc,s3,test]"] -calc = ["shapely"] -s3 = ["boto3 (>=1.3.1)"] -test = ["fiona[s3]", "pytest (>=7)", "pytest-cov", "pytz"] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + +[[package]] +name = "flask-cors" +version = "4.0.1" +description = "A Flask extension adding a decorator for CORS support" +optional = false +python-versions = "*" +files = [ + {file = "Flask_Cors-4.0.1-py2.py3-none-any.whl", hash = "sha256:f2a704e4458665580c074b714c4627dd5a306b333deb9074d0b1794dfa2fb677"}, + {file = "flask_cors-4.0.1.tar.gz", hash = "sha256:eeb69b342142fdbf4766ad99357a7f3876a2ceb77689dc10ff912aac06c389e4"}, +] + +[package.dependencies] +Flask = ">=0.9" [[package]] name = "flox" @@ -1081,13 +1478,13 @@ files = [ [[package]] name = "fsspec" -version = "2024.6.0" +version = "2024.6.1" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.6.0-py3-none-any.whl", hash = "sha256:58d7122eb8a1a46f7f13453187bfea4972d66bf01618d37366521b1998034cee"}, - {file = "fsspec-2024.6.0.tar.gz", hash = "sha256:f579960a56e6d8038a9efc8f9c77279ec12e6299aa86b0769a7e9c46b94527c2"}, + {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"}, + {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"}, ] [package.extras] @@ -1118,24 +1515,157 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] +[[package]] +name = "fuzzywuzzy" +version = "0.18.0" +description = "Fuzzy string matching in python" +optional = false +python-versions = "*" +files = [ + {file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"}, + {file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"}, +] + +[package.extras] +speedup = ["python-levenshtein (>=0.12)"] + [[package]] name = "geopandas" -version = "0.14.4" +version = "1.0.1" description = "Geographic pandas extensions" optional = false python-versions = ">=3.9" files = [ - {file = "geopandas-0.14.4-py3-none-any.whl", hash = "sha256:3bb6473cb59d51e1a7fe2dbc24a1a063fb0ebdeddf3ce08ddbf8c7ddc99689aa"}, - {file = "geopandas-0.14.4.tar.gz", hash = "sha256:56765be9d58e2c743078085db3bd07dc6be7719f0dbe1dfdc1d705cb80be7c25"}, + {file = "geopandas-1.0.1-py3-none-any.whl", hash = "sha256:01e147d9420cc374d26f51fc23716ac307f32b49406e4bd8462c07e82ed1d3d6"}, + {file = "geopandas-1.0.1.tar.gz", hash = "sha256:b8bf70a5534588205b7a56646e2082fb1de9a03599651b3d80c99ea4c2ca08ab"}, ] [package.dependencies] -fiona = ">=1.8.21" numpy = ">=1.22" packaging = "*" pandas = ">=1.4.0" +pyogrio = ">=0.7.2" pyproj = ">=3.3.0" -shapely = ">=1.8.0" +shapely = ">=2.0.0" + +[package.extras] +all = ["GeoAlchemy2", "SQLAlchemy (>=1.3)", "folium", "geopy", "mapclassify", "matplotlib (>=3.5.0)", "psycopg-binary (>=3.1.0)", "pyarrow (>=8.0.0)", "xyzservices"] +dev = ["black", "codecov", "pre-commit", "pytest (>=3.1.0)", "pytest-cov", "pytest-xdist"] + +[[package]] +name = "gilknocker" +version = "0.4.1" +description = "Knock on the Python GIL, determine how busy it is." +optional = false +python-versions = ">=3.7" +files = [ + {file = "gilknocker-0.4.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:094ee864032e54fafa8e0c87edf62ccc70fcc6322bea76ddf890e9564d1b758e"}, + {file = "gilknocker-0.4.1-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:50450a1adf10df257dc68baf9c67437b26e1b85914a79e2b5e7b7c1ae8542dcc"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2421eb3723c32be4f6f1896a8a54a0e761cab3347b236a02b542478f700b8a62"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00a41675a3feed22a4bcac7461730a3a32678b1ea9b36cd7802003251a333314"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61dd81cd8af27d0b796f7d8f51ae012cad0499bd20cfed1ffb93fa2a0d2cf24c"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4e5b87440b828c3b412c9f4592bd963d22e38d3aca71cb31379b1d9581105b52"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:51d1eaeb8fc0fefc3335be54a96876bbbcc73b1606887081aa3161ea6709439b"}, + {file = "gilknocker-0.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ba6a363be7994631bc497c33d26f9782d72db4d7e75ba0c08db20d04204c3fd"}, + {file = "gilknocker-0.4.1-cp310-none-win32.whl", hash = "sha256:8aae0a07bf76de63d2adf299e3a342e03dad11df3b8cea65f0c2e74411824dae"}, + {file = "gilknocker-0.4.1-cp310-none-win_amd64.whl", hash = "sha256:34446b914b2831df22594ea24179bb55897f48b93a2efcf9b869cd01aad5f3bd"}, + {file = "gilknocker-0.4.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:0a291450033eb9c404ba0b80fd5f522bfe9bf1805a23f88bd665abc32587ac39"}, + {file = "gilknocker-0.4.1-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:66a4eb863229e8f99a867ca7ef57e34f9e757ca3599b4e4e554795370cfb30a6"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:90ddf44dde01578801a7fb52421766c7e0dde343b2b220f97f3f3f1f05c4e604"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10895677784c932a61f461171ffa6088561be30c66f8b264745d9999d4532cfe"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a38ad5dc9e573be579802a017ebdc912739dba835494f69b4bd898f4a02e071"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a077837c3ce3b4912f95fbd256a7bbccc1768ec77831637e555e5fc78eec224"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16594eb6e9ea7759ae1f512ea95c931ad1f2d01afb30640d29f1422f4c5827bd"}, + {file = "gilknocker-0.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de32005922d3d8044835dcb2374761b96abc7498e8e1510e13e4c052ac0d545e"}, + {file = "gilknocker-0.4.1-cp311-none-win32.whl", hash = "sha256:d2818d2945d019c94a8853e42b930afb3d27aab2674636792f77ed32fedcae4b"}, + {file = "gilknocker-0.4.1-cp311-none-win_amd64.whl", hash = "sha256:b433f8aea88fa001475b644afb44e8932ec44362282e5ad76d21ee1ef4b9ea8f"}, + {file = "gilknocker-0.4.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:a247f49b2f6614e61462aada8cc91006eda92edd2d6a3d714eb29d760dbf15b8"}, + {file = "gilknocker-0.4.1-cp312-cp312-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:8f536a27e062cfb9b14d0e73f65029396a65c09e8e354361cfbea431ad86de07"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:25668ccd0bf613fd31e2ace9de479d1441a47b0f7867f163a3d0a1df399871e2"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:895bd5f7d446c12e190489a53bd645d5ef9af2ac0a7a9897bc67a2d699d48759"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e83bed6c89dc7af96a02e7a950f4e9c72b2c839af8727cedc598bf26ae89c94"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b70a6252cdd1af4b84feec43c54fdb55577e28dcee3c5bbaaee8eb5aff2a50e"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b88aac029d7829de9b35bc739fef72d96911be2ad31fdaa8602a6d89c2d19f8"}, + {file = "gilknocker-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185b216313c235507b14f26428c11162677d80b443194e94c37f1abd99a30a43"}, + {file = "gilknocker-0.4.1-cp312-none-win32.whl", hash = "sha256:602514d9e1da006c35de249909046410635b15fb65886e96113c71b3a9865300"}, + {file = "gilknocker-0.4.1-cp312-none-win_amd64.whl", hash = "sha256:89d08af4071088cf1a5fa154ea35236503c86b7dd12429945b1bcd5bb1d62bef"}, + {file = "gilknocker-0.4.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:f987a0c37f0b44eb4f45051ab33cb0c5ae1381bdfacffdb06c89b28bab03a409"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8e2436acccdf75bb8622d35ea5af693d2fdcc9e63567a8424af6e863973a917f"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6fd1c92524130617a5eb142d7f4f16d874609c1835db3a197183db50d3f8d2"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5829c1cc5c2844413148fa37782a38a0c17ad4e9529d1e6e267308e99a95381e"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ead828a9a0e145634640b58c4a3dcd6c8375c60f99284b4afb919da29d97c05d"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07c0e2394d040330d5c6de3f797901407071d1e8813c95a18f97f1fe02cf5f0a"}, + {file = "gilknocker-0.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ff2f4b7e09efb3aa316ecc3d6df7df1c8d50053919e5c8e933ddc85aa932148"}, + {file = "gilknocker-0.4.1-cp37-none-win32.whl", hash = "sha256:c228837d176cbc601ee30962c9b79d1336ed350f7ac7ef67dc889afb09eb1979"}, + {file = "gilknocker-0.4.1-cp37-none-win_amd64.whl", hash = "sha256:0f929bb718bde531608f9da9317d0e06bcc6b64d78021c1ba43782f0ab02d763"}, + {file = "gilknocker-0.4.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:a83d4a1fb1501c45ac1c7f6fec55b2f7ad6efea4c1f31504055f2b7bac62792a"}, + {file = "gilknocker-0.4.1-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:7a37ad5f0ea74627a2adf392e7d1ff5930c44275a47e65e2853719a43c441707"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9b9d43848f015e95d6f1645f52d96e5994efbb96456c57161acb164adbdc53d9"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6246e55ceee31fd04c624741ecd753402334515a4db191a32f7b3bfc46d3020"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3def27c21e2241c5a027784560e694d2e246bb9ee6d1ad5fa62f8587affc8742"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34fb214872a506ebc19a27fc398f7cc837138b11eb816a028ae7e4cd9202feec"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5cfdd4e033119dabff9d6b2166d11ea8775a29c958b648896c5c88608ebfea7b"}, + {file = "gilknocker-0.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2288fbb606c457b3103306c9e72f124782ccc7a48b1fff5dd0b0ea379f2ee6cc"}, + {file = "gilknocker-0.4.1-cp38-none-win32.whl", hash = "sha256:b2a56dec5155968c12b9463c662b1cfaec87329ba59b6bbba4944907e6e7bb10"}, + {file = "gilknocker-0.4.1-cp38-none-win_amd64.whl", hash = "sha256:9e82fcb264663edcf4d2478d929be873c3633ea339fe676a915b1f468499b39a"}, + {file = "gilknocker-0.4.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:ca95db3f4a73d60cfbfecdea6aacea9a10931ae8bfe010db70bad214eebad55c"}, + {file = "gilknocker-0.4.1-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:8b38a0e2f9231e7324c832099dcad3ec4c2f5b3a48105ba8e1d2496cf245feb0"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8e6386b21256a3ab35d97df9f151b716278ee3f6e99ca2b4b587bb43d60d1516"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33b5fc51e19fe89dc661942c493de4946e6ba0cb642a15c187627548cd2ef329"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5910d9ff1530848a1b941d3fd3cfb6d376439b6788ed3e7319cc0ecffb6f4caf"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a81ef1b79c7093eb26e3affd73963b224e3be41fc19cf5cb175bb05da834579"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8290247beb092d6ca73a75a2bdee19d8ca89cf18dfeec23e9c3b8826b9a929e6"}, + {file = "gilknocker-0.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2bc4daceb5934cecfec12c703ebe584528f2af57ef475697d7d3d50ce0e1757"}, + {file = "gilknocker-0.4.1-cp39-none-win32.whl", hash = "sha256:f0c87669639439c5073ad4aae30ea70c00f2567f56cf8623f43a7062217a732c"}, + {file = "gilknocker-0.4.1-cp39-none-win_amd64.whl", hash = "sha256:d0636b90a0256c2b3e7fa58deb8e7d55e6a1fe90133c2715fec47a26b999d366"}, + {file = "gilknocker-0.4.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0873037b0d4f9efd00ec1a46596f198509052818c7ed729517248a14005ad4be"}, + {file = "gilknocker-0.4.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9a9fd6b66059d8bfcb8c77601fc667e3a83ca1c970923f137476427bbc61bb5"}, + {file = "gilknocker-0.4.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:153fe62a970dd549ca7a0898b13db5130bd8d3987d7d8688bd75e553a50f680f"}, + {file = "gilknocker-0.4.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:aad96df6d3859045860ebcb94e3687c72e0ebca2856300262bc87356952ec51c"}, + {file = "gilknocker-0.4.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f65388813d204e4722efe1d258f0b56d4205b2a0621e3ef37eaca90827dbc4be"}, + {file = "gilknocker-0.4.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b99617fed8e8023b9af0e4b9fba2c1d98d701bf05ae607ae4a4e8f7ac609151e"}, + {file = "gilknocker-0.4.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:7d332a47adfa16e9e461af00909349e6c81bd63ac58d2ce0730e30b87cc1a063"}, + {file = "gilknocker-0.4.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:217c081ca35de545d1c76ef063e48b6a88e062132cee96a5afdd172473ee4161"}, + {file = "gilknocker-0.4.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d566f50deb089a64ef49d2174fc046c56b1d1e77519dad0c377b66c9a2a9755"}, + {file = "gilknocker-0.4.1.tar.gz", hash = "sha256:0a9ce42b50221e8ea9572e28847ec46a1a527124a25e6f6f7a0f1d2668c9241c"}, +] + +[[package]] +name = "graphql-core" +version = "3.2.3" +description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL." +optional = false +python-versions = ">=3.6,<4" +files = [ + {file = "graphql-core-3.2.3.tar.gz", hash = "sha256:06d2aad0ac723e35b1cb47885d3e5c45e956a53bc1b209a9fc5369007fe46676"}, + {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"}, +] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" [[package]] name = "h5netcdf" @@ -1188,15 +1718,83 @@ files = [ [package.dependencies] numpy = ">=1.17.3" +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + +[[package]] +name = "httpcore" +version = "1.0.5" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, + {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.26.0)"] + +[[package]] +name = "httpx" +version = "0.27.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "hypothesis" -version = "6.103.1" +version = "6.104.2" description = "A library for property-based testing" optional = false python-versions = ">=3.8" files = [ - {file = "hypothesis-6.103.1-py3-none-any.whl", hash = "sha256:d3c959fab6233e78867499e2117ae9db8dc40eeed936d71a2cfc7b6094972e74"}, - {file = "hypothesis-6.103.1.tar.gz", hash = "sha256:d299d5c21d6408eab3be670c94c974f3acf0b511c61fe81804b09091e393ee1f"}, + {file = "hypothesis-6.104.2-py3-none-any.whl", hash = "sha256:8b52b7e2462e552c75b819495d5cb6251a2b840accc79cf2ce52588004c915d9"}, + {file = "hypothesis-6.104.2.tar.gz", hash = "sha256:6f2a1489bc8fe1c87ffd202707319b66ec46b2bc11faf6e0161e957b8b9b1eab"}, ] [package.dependencies] @@ -1205,10 +1803,10 @@ exceptiongroup = {version = ">=1.0.0", markers = "python_version < \"3.11\""} sortedcontainers = ">=2.1.0,<3.0.0" [package.extras] -all = ["backports.zoneinfo (>=0.2.1)", "black (>=19.10b0)", "click (>=7.0)", "crosshair-tool (>=0.0.54)", "django (>=3.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.4)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.17.3)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2024.1)"] +all = ["backports.zoneinfo (>=0.2.1)", "black (>=19.10b0)", "click (>=7.0)", "crosshair-tool (>=0.0.55)", "django (>=3.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.4)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.17.3)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2024.1)"] cli = ["black (>=19.10b0)", "click (>=7.0)", "rich (>=9.0.0)"] codemods = ["libcst (>=0.3.16)"] -crosshair = ["crosshair-tool (>=0.0.54)", "hypothesis-crosshair (>=0.0.4)"] +crosshair = ["crosshair-tool (>=0.0.55)", "hypothesis-crosshair (>=0.0.4)"] dateutil = ["python-dateutil (>=1.4)"] django = ["django (>=3.2)"] dpcontracts = ["dpcontracts (>=0.4)"] @@ -1246,24 +1844,35 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "imagesize" +version = "1.4.1" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] + [[package]] name = "importlib-metadata" -version = "7.1.0" +version = "8.0.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, - {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, + {file = "importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f"}, + {file = "importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"}, ] [package.dependencies] zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "iniconfig" @@ -1276,6 +1885,17 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "invoke" +version = "2.2.0" +description = "Pythonic task execution" +optional = false +python-versions = ">=3.6" +files = [ + {file = "invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820"}, + {file = "invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5"}, +] + [[package]] name = "ipdb" version = "0.13.13" @@ -1328,6 +1948,38 @@ parallel = ["ipyparallel"] qtconsole = ["qtconsole"] test = ["ipykernel", "nbformat", "nose (>=0.10.1)", "numpy (>=1.17)", "pygments", "requests", "testpath"] +[[package]] +name = "ipywidgets" +version = "8.1.3" +description = "Jupyter interactive widgets" +optional = false +python-versions = ">=3.7" +files = [ + {file = "ipywidgets-8.1.3-py3-none-any.whl", hash = "sha256:efafd18f7a142248f7cb0ba890a68b96abd4d6e88ddbda483c9130d12667eaf2"}, + {file = "ipywidgets-8.1.3.tar.gz", hash = "sha256:f5f9eeaae082b1823ce9eac2575272952f40d748893972956dc09700a6392d9c"}, +] + +[package.dependencies] +comm = ">=0.1.3" +ipython = ">=6.1.0" +jupyterlab-widgets = ">=3.0.11,<3.1.0" +traitlets = ">=4.3.1" +widgetsnbextension = ">=4.0.11,<4.1.0" + +[package.extras] +test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"] + +[[package]] +name = "itsdangerous" +version = "2.2.0" +description = "Safely pass data to untrusted environments and back." +optional = false +python-versions = ">=3.8" +files = [ + {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, + {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, +] + [[package]] name = "jedi" version = "0.19.1" @@ -1375,6 +2027,79 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "joserfc" +version = "0.12.0" +description = "The ultimate Python library for JOSE RFCs, including JWS, JWE, JWK, JWA, JWT" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joserfc-0.12.0-py3-none-any.whl", hash = "sha256:210f21ec1c3d08c9a0d9969d7825d2020f365ad6b3a0d6c6c0b638704f96a5b0"}, + {file = "joserfc-0.12.0.tar.gz", hash = "sha256:86625aef30bb9857f8c2f4320ea2ad4342a29319a66189cb743547c74a88b1ec"}, +] + +[package.dependencies] +cryptography = "*" + +[package.extras] +drafts = ["pycryptodome"] + +[[package]] +name = "jsondiff" +version = "2.1.1" +description = "Diff JSON and JSON-like structures in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsondiff-2.1.1-py3-none-any.whl", hash = "sha256:ffab5bc00237c2c9f48a4b07fff7bf7df13e4b98f9585bd00b6e6e5f371a98fc"}, + {file = "jsondiff-2.1.1.tar.gz", hash = "sha256:c7dfd4f8c9307500a536e9b93492b2c1ba62dac2b3c5189aa6e37d63b427b4d8"}, +] + +[package.dependencies] +pyyaml = "*" + +[package.extras] +dev = ["build", "hypothesis", "pytest", "setuptools-scm"] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpath-ng" +version = "1.6.1" +description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." +optional = false +python-versions = "*" +files = [ + {file = "jsonpath-ng-1.6.1.tar.gz", hash = "sha256:086c37ba4917304850bd837aeab806670224d3f038fe2833ff593a672ef0a5fa"}, + {file = "jsonpath_ng-1.6.1-py3-none-any.whl", hash = "sha256:8f22cd8273d7772eea9aaa84d922e0841aa36fdb8a2c6b7f6c3791a16a9bc0be"}, +] + +[package.dependencies] +ply = "*" + +[[package]] +name = "jsonpointer" +version = "3.0.0" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, + {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, +] + [[package]] name = "jsonschema" version = "4.22.0" @@ -1396,6 +2121,23 @@ rpds-py = ">=0.7.1" format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] +[[package]] +name = "jsonschema-path" +version = "0.3.3" +description = "JSONSchema Spec with object-oriented paths" +optional = false +python-versions = "<4.0.0,>=3.8.0" +files = [ + {file = "jsonschema_path-0.3.3-py3-none-any.whl", hash = "sha256:203aff257f8038cd3c67be614fe6b2001043408cb1b4e36576bc4921e09d83c4"}, + {file = "jsonschema_path-0.3.3.tar.gz", hash = "sha256:f02e5481a4288ec062f8e68c808569e427d905bedfecb7f2e4c69ef77957c382"}, +] + +[package.dependencies] +pathable = ">=0.4.1,<0.5.0" +PyYAML = ">=5.1" +referencing = ">=0.28.0,<0.36.0" +requests = ">=2.31.0,<3.0.0" + [[package]] name = "jsonschema-specifications" version = "2023.12.1" @@ -1410,6 +2152,17 @@ files = [ [package.dependencies] referencing = ">=0.31.0" +[[package]] +name = "jupyterlab-widgets" +version = "3.0.11" +description = "Jupyter interactive widgets for JupyterLab" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jupyterlab_widgets-3.0.11-py3-none-any.whl", hash = "sha256:78287fd86d20744ace330a61625024cf5521e1c012a352ddc0a3cdc2348becd0"}, + {file = "jupyterlab_widgets-3.0.11.tar.gz", hash = "sha256:dd5ac679593c969af29c9bed054c24f26842baa51352114736756bc035deee27"}, +] + [[package]] name = "kiwisolver" version = "1.4.5" @@ -1523,34 +2276,80 @@ files = [ {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] +[[package]] +name = "lazy-object-proxy" +version = "1.10.0" +description = "A fast and thorough lazy object proxy." +optional = false +python-versions = ">=3.8" +files = [ + {file = "lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:855e068b0358ab916454464a884779c7ffa312b8925c6f7401e952dcf3b89977"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab7004cf2e59f7c2e4345604a3e6ea0d92ac44e1c2375527d56492014e690c3"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc0d2fc424e54c70c4bc06787e4072c4f3b1aa2f897dfdc34ce1013cf3ceef05"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e2adb09778797da09d2b5ebdbceebf7dd32e2c96f79da9052b2e87b6ea495895"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1f711e2c6dcd4edd372cf5dec5c5a30d23bba06ee012093267b3376c079ec83"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-win32.whl", hash = "sha256:76a095cfe6045c7d0ca77db9934e8f7b71b14645f0094ffcd842349ada5c5fb9"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:b4f87d4ed9064b2628da63830986c3d2dca7501e6018347798313fcf028e2fd4"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fec03caabbc6b59ea4a638bee5fce7117be8e99a4103d9d5ad77f15d6f81020c"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02c83f957782cbbe8136bee26416686a6ae998c7b6191711a04da776dc9e47d4"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:009e6bb1f1935a62889ddc8541514b6a9e1fcf302667dcb049a0be5c8f613e56"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75fc59fc450050b1b3c203c35020bc41bd2695ed692a392924c6ce180c6f1dc9"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:782e2c9b2aab1708ffb07d4bf377d12901d7a1d99e5e410d648d892f8967ab1f"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-win32.whl", hash = "sha256:edb45bb8278574710e68a6b021599a10ce730d156e5b254941754a9cc0b17d03"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:e271058822765ad5e3bca7f05f2ace0de58a3f4e62045a8c90a0dfd2f8ad8cc6"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e98c8af98d5707dcdecc9ab0863c0ea6e88545d42ca7c3feffb6b4d1e370c7ba"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:952c81d415b9b80ea261d2372d2a4a2332a3890c2b83e0535f263ddfe43f0d43"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b39d3a151309efc8cc48675918891b865bdf742a8616a337cb0090791a0de9"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e221060b701e2aa2ea991542900dd13907a5c90fa80e199dbf5a03359019e7a3"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:92f09ff65ecff3108e56526f9e2481b8116c0b9e1425325e13245abfd79bdb1b"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-win32.whl", hash = "sha256:3ad54b9ddbe20ae9f7c1b29e52f123120772b06dbb18ec6be9101369d63a4074"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:127a789c75151db6af398b8972178afe6bda7d6f68730c057fbbc2e96b08d282"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4ed0518a14dd26092614412936920ad081a424bdcb54cc13349a8e2c6d106a"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ad9e6ed739285919aa9661a5bbed0aaf410aa60231373c5579c6b4801bd883c"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc0a92c02fa1ca1e84fc60fa258458e5bf89d90a1ddaeb8ed9cc3147f417255"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0aefc7591920bbd360d57ea03c995cebc204b424524a5bd78406f6e1b8b2a5d8"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5faf03a7d8942bb4476e3b62fd0f4cf94eaf4618e304a19865abf89a35c0bbee"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-win32.whl", hash = "sha256:e333e2324307a7b5d86adfa835bb500ee70bfcd1447384a822e96495796b0ca4"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:cb73507defd385b7705c599a94474b1d5222a508e502553ef94114a143ec6696"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366c32fe5355ef5fc8a232c5436f4cc66e9d3e8967c01fb2e6302fd6627e3d94"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297f08f08a2bb0d32a4265e98a006643cd7233fb7983032bd61ac7a02956b3b"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18dd842b49456aaa9a7cf535b04ca4571a302ff72ed8740d06b5adcd41fe0757"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:217138197c170a2a74ca0e05bddcd5f1796c735c37d0eee33e43259b192aa424"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a3a87cf1e133e5b1994144c12ca4aa3d9698517fe1e2ca82977781b16955658"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-win32.whl", hash = "sha256:30b339b2a743c5288405aa79a69e706a06e02958eab31859f7f3c04980853b70"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:a899b10e17743683b293a729d3a11f2f399e8a90c73b089e29f5d0fe3509f0dd"}, + {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, +] + [[package]] name = "llvmlite" -version = "0.42.0" +version = "0.43.0" description = "lightweight wrapper around basic LLVM functionality" optional = false python-versions = ">=3.9" files = [ - {file = "llvmlite-0.42.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3366938e1bf63d26c34fbfb4c8e8d2ded57d11e0567d5bb243d89aab1eb56098"}, - {file = "llvmlite-0.42.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c35da49666a21185d21b551fc3caf46a935d54d66969d32d72af109b5e7d2b6f"}, - {file = "llvmlite-0.42.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70f44ccc3c6220bd23e0ba698a63ec2a7d3205da0d848804807f37fc243e3f77"}, - {file = "llvmlite-0.42.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f8d8717a9073b9e0246998de89929071d15b47f254c10eef2310b9aac033d"}, - {file = "llvmlite-0.42.0-cp310-cp310-win_amd64.whl", hash = "sha256:8d90edf400b4ceb3a0e776b6c6e4656d05c7187c439587e06f86afceb66d2be5"}, - {file = "llvmlite-0.42.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ae511caed28beaf1252dbaf5f40e663f533b79ceb408c874c01754cafabb9cbf"}, - {file = "llvmlite-0.42.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81e674c2fe85576e6c4474e8c7e7aba7901ac0196e864fe7985492b737dbab65"}, - {file = "llvmlite-0.42.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb3975787f13eb97629052edb5017f6c170eebc1c14a0433e8089e5db43bcce6"}, - {file = "llvmlite-0.42.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5bece0cdf77f22379f19b1959ccd7aee518afa4afbd3656c6365865f84903f9"}, - {file = "llvmlite-0.42.0-cp311-cp311-win_amd64.whl", hash = "sha256:7e0c4c11c8c2aa9b0701f91b799cb9134a6a6de51444eff5a9087fc7c1384275"}, - {file = "llvmlite-0.42.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:08fa9ab02b0d0179c688a4216b8939138266519aaa0aa94f1195a8542faedb56"}, - {file = "llvmlite-0.42.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b2fce7d355068494d1e42202c7aff25d50c462584233013eb4470c33b995e3ee"}, - {file = "llvmlite-0.42.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebe66a86dc44634b59a3bc860c7b20d26d9aaffcd30364ebe8ba79161a9121f4"}, - {file = "llvmlite-0.42.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d47494552559e00d81bfb836cf1c4d5a5062e54102cc5767d5aa1e77ccd2505c"}, - {file = "llvmlite-0.42.0-cp312-cp312-win_amd64.whl", hash = "sha256:05cb7e9b6ce69165ce4d1b994fbdedca0c62492e537b0cc86141b6e2c78d5888"}, - {file = "llvmlite-0.42.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bdd3888544538a94d7ec99e7c62a0cdd8833609c85f0c23fcb6c5c591aec60ad"}, - {file = "llvmlite-0.42.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0936c2067a67fb8816c908d5457d63eba3e2b17e515c5fe00e5ee2bace06040"}, - {file = "llvmlite-0.42.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a78ab89f1924fc11482209f6799a7a3fc74ddc80425a7a3e0e8174af0e9e2301"}, - {file = "llvmlite-0.42.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7599b65c7af7abbc978dbf345712c60fd596aa5670496561cc10e8a71cebfb2"}, - {file = "llvmlite-0.42.0-cp39-cp39-win_amd64.whl", hash = "sha256:43d65cc4e206c2e902c1004dd5418417c4efa6c1d04df05c6c5675a27e8ca90e"}, - {file = "llvmlite-0.42.0.tar.gz", hash = "sha256:f92b09243c0cc3f457da8b983f67bd8e1295d0f5b3746c7a1861d7a99403854a"}, + {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"}, + {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"}, + {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"}, + {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"}, + {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"}, + {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"}, + {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, ] [[package]] @@ -1614,6 +2413,30 @@ docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] flake8 = ["flake8"] tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" version = "2.1.5" @@ -1749,6 +2572,94 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "moto" +version = "5.0.10" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "moto-5.0.10-py2.py3-none-any.whl", hash = "sha256:9ffae2f64cc8fe95b9a12d63ae7268a7d6bea9993b922905b5abd8197d852cd0"}, + {file = "moto-5.0.10.tar.gz", hash = "sha256:eff37363221c93ea44f95721ae0ddb56f977fe70437a041b6cc641ee90266279"}, +] + +[package.dependencies] +antlr4-python3-runtime = {version = "*", optional = true, markers = "extra == \"all\" or extra == \"server\""} +aws-xray-sdk = {version = ">=0.93,<0.96 || >0.96", optional = true, markers = "extra == \"all\" or extra == \"server\""} +boto3 = ">=1.9.201" +botocore = ">=1.14.0" +cfn-lint = {version = ">=0.40.0", optional = true, markers = "extra == \"all\" or extra == \"server\""} +cryptography = ">=3.3.1" +docker = {version = ">=3.0.0", optional = true, markers = "extra == \"all\" or extra == \"server\""} +flask = {version = "<2.2.0 || >2.2.0,<2.2.1 || >2.2.1", optional = true, markers = "extra == \"server\""} +flask-cors = {version = "*", optional = true, markers = "extra == \"server\""} +graphql-core = {version = "*", optional = true, markers = "extra == \"all\" or extra == \"server\""} +Jinja2 = ">=2.10.1" +joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"all\" or extra == \"server\""} +jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"all\" or extra == \"server\""} +jsonpath-ng = {version = "*", optional = true, markers = "extra == \"all\" or extra == \"server\""} +multipart = {version = "*", optional = true, markers = "extra == \"all\""} +openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"all\" or extra == \"server\""} +py-partiql-parser = {version = "0.5.5", optional = true, markers = "extra == \"all\" or extra == \"s3\" or extra == \"server\""} +pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"all\" or extra == \"server\""} +python-dateutil = ">=2.1,<3.0.0" +PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"all\" or extra == \"s3\" or extra == \"server\""} +requests = ">=2.5" +responses = ">=0.15.0" +setuptools = {version = "*", optional = true, markers = "extra == \"all\" or extra == \"server\""} +werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" +xmltodict = "*" + +[package.extras] +all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] +apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"] +apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"] +appsync = ["graphql-core"] +awslambda = ["docker (>=3.0.0)"] +batch = ["docker (>=3.0.0)"] +cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] +cognitoidp = ["joserfc (>=0.9.0)"] +dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.5)"] +dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.5)"] +glue = ["pyparsing (>=3.0.7)"] +iotdata = ["jsondiff (>=1.1.2)"] +proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] +resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)"] +s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.5)"] +s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.5)"] +server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] +ssm = ["PyYAML (>=5.1)"] +stepfunctions = ["antlr4-python3-runtime", "jsonpath-ng"] +xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "msgpack" version = "1.0.8" @@ -1913,40 +2824,51 @@ files = [ {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] +[[package]] +name = "multipart" +version = "0.2.5" +description = "Parser for multipart/form-data." +optional = false +python-versions = ">=3.5" +files = [ + {file = "multipart-0.2.5-py3-none-any.whl", hash = "sha256:96352d67fa1f704e2bcbec9726d7fb316533bd010f0c66639f930fb59b734931"}, + {file = "multipart-0.2.5.tar.gz", hash = "sha256:fa98838d40c967bb19589626a0fb8a5c40c421dda2febe1b0351fcf626e24651"}, +] + [[package]] name = "mypy" -version = "1.10.0" +version = "1.10.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, - {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, - {file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"}, - {file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"}, - {file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"}, - {file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"}, - {file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"}, - {file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"}, - {file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"}, - {file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"}, - {file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"}, - {file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"}, - {file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"}, - {file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"}, - {file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"}, - {file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"}, - {file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"}, - {file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"}, - {file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"}, - {file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"}, - {file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"}, - {file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"}, - {file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"}, - {file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"}, - {file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"}, - {file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"}, - {file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"}, + {file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"}, + {file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"}, + {file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"}, + {file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"}, + {file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"}, + {file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"}, + {file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"}, + {file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"}, + {file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"}, + {file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"}, + {file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"}, + {file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"}, + {file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"}, + {file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"}, + {file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"}, + {file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"}, + {file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"}, + {file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"}, + {file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"}, ] [package.dependencies] @@ -2036,6 +2958,24 @@ numpy = "*" [package.extras] tests = ["Cython", "packaging", "pytest"] +[[package]] +name = "networkx" +version = "3.3" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.10" +files = [ + {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, + {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"}, +] + +[package.extras] +default = ["matplotlib (>=3.6)", "numpy (>=1.23)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + [[package]] name = "nodeenv" version = "1.9.1" @@ -2049,37 +2989,37 @@ files = [ [[package]] name = "numba" -version = "0.59.1" +version = "0.60.0" description = "compiling Python code using LLVM" optional = false python-versions = ">=3.9" files = [ - {file = "numba-0.59.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:97385a7f12212c4f4bc28f648720a92514bee79d7063e40ef66c2d30600fd18e"}, - {file = "numba-0.59.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b77aecf52040de2a1eb1d7e314497b9e56fba17466c80b457b971a25bb1576d"}, - {file = "numba-0.59.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3476a4f641bfd58f35ead42f4dcaf5f132569c4647c6f1360ccf18ee4cda3990"}, - {file = "numba-0.59.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:525ef3f820931bdae95ee5379c670d5c97289c6520726bc6937a4a7d4230ba24"}, - {file = "numba-0.59.1-cp310-cp310-win_amd64.whl", hash = "sha256:990e395e44d192a12105eca3083b61307db7da10e093972ca285c85bef0963d6"}, - {file = "numba-0.59.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:43727e7ad20b3ec23ee4fc642f5b61845c71f75dd2825b3c234390c6d8d64051"}, - {file = "numba-0.59.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:411df625372c77959570050e861981e9d196cc1da9aa62c3d6a836b5cc338966"}, - {file = "numba-0.59.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2801003caa263d1e8497fb84829a7ecfb61738a95f62bc05693fcf1733e978e4"}, - {file = "numba-0.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dd2842fac03be4e5324ebbbd4d2d0c8c0fc6e0df75c09477dd45b288a0777389"}, - {file = "numba-0.59.1-cp311-cp311-win_amd64.whl", hash = "sha256:0594b3dfb369fada1f8bb2e3045cd6c61a564c62e50cf1f86b4666bc721b3450"}, - {file = "numba-0.59.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1cce206a3b92836cdf26ef39d3a3242fec25e07f020cc4feec4c4a865e340569"}, - {file = "numba-0.59.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c8b4477763cb1fbd86a3be7050500229417bf60867c93e131fd2626edb02238"}, - {file = "numba-0.59.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d80bce4ef7e65bf895c29e3889ca75a29ee01da80266a01d34815918e365835"}, - {file = "numba-0.59.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f7ad1d217773e89a9845886401eaaab0a156a90aa2f179fdc125261fd1105096"}, - {file = "numba-0.59.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bf68f4d69dd3a9f26a9b23548fa23e3bcb9042e2935257b471d2a8d3c424b7f"}, - {file = "numba-0.59.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4e0318ae729de6e5dbe64c75ead1a95eb01fabfe0e2ebed81ebf0344d32db0ae"}, - {file = "numba-0.59.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0f68589740a8c38bb7dc1b938b55d1145244c8353078eea23895d4f82c8b9ec1"}, - {file = "numba-0.59.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:649913a3758891c77c32e2d2a3bcbedf4a69f5fea276d11f9119677c45a422e8"}, - {file = "numba-0.59.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9712808e4545270291d76b9a264839ac878c5eb7d8b6e02c970dc0ac29bc8187"}, - {file = "numba-0.59.1-cp39-cp39-win_amd64.whl", hash = "sha256:8d51ccd7008a83105ad6a0082b6a2b70f1142dc7cfd76deb8c5a862367eb8c86"}, - {file = "numba-0.59.1.tar.gz", hash = "sha256:76f69132b96028d2774ed20415e8c528a34e3299a40581bae178f0994a2f370b"}, -] - -[package.dependencies] -llvmlite = "==0.42.*" -numpy = ">=1.22,<1.27" + {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"}, + {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"}, + {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"}, + {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"}, + {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"}, + {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"}, + {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"}, + {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"}, + {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"}, + {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"}, + {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"}, + {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"}, + {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"}, +] + +[package.dependencies] +llvmlite = "==0.43.*" +numpy = ">=1.22,<2.1" [[package]] name = "numbagg" @@ -2202,6 +3142,39 @@ numpy = "*" dev = ["numba", "pandas", "pytest"] fast = ["numba"] +[[package]] +name = "openapi-schema-validator" +version = "0.6.2" +description = "OpenAPI schema validation for Python" +optional = false +python-versions = ">=3.8.0,<4.0.0" +files = [ + {file = "openapi_schema_validator-0.6.2-py3-none-any.whl", hash = "sha256:c4887c1347c669eb7cded9090f4438b710845cd0f90d1fb9e1b3303fb37339f8"}, + {file = "openapi_schema_validator-0.6.2.tar.gz", hash = "sha256:11a95c9c9017912964e3e5f2545a5b11c3814880681fcacfb73b1759bb4f2804"}, +] + +[package.dependencies] +jsonschema = ">=4.19.1,<5.0.0" +jsonschema-specifications = ">=2023.5.2,<2024.0.0" +rfc3339-validator = "*" + +[[package]] +name = "openapi-spec-validator" +version = "0.7.1" +description = "OpenAPI 2.0 (aka Swagger) and OpenAPI 3 spec validator" +optional = false +python-versions = ">=3.8.0,<4.0.0" +files = [ + {file = "openapi_spec_validator-0.7.1-py3-none-any.whl", hash = "sha256:3c81825043f24ccbcd2f4b149b11e8231abce5ba84f37065e14ec947d8f4e959"}, + {file = "openapi_spec_validator-0.7.1.tar.gz", hash = "sha256:8577b85a8268685da6f8aa30990b83b7960d4d1117e901d451b5d572605e5ec7"}, +] + +[package.dependencies] +jsonschema = ">=4.18.0,<5.0.0" +jsonschema-path = ">=0.3.1,<0.4.0" +lazy-object-proxy = ">=1.7.1,<2.0.0" +openapi-schema-validator = ">=0.6.0,<0.7.0" + [[package]] name = "opt-einsum" version = "3.3.0" @@ -2222,13 +3195,13 @@ tests = ["pytest", "pytest-cov", "pytest-pep8"] [[package]] name = "packaging" -version = "24.0" +version = "24.1" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] [[package]] @@ -2300,6 +3273,27 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "paramiko" +version = "3.4.0" +description = "SSH2 protocol library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "paramiko-3.4.0-py3-none-any.whl", hash = "sha256:43f0b51115a896f9c00f59618023484cb3a14b98bbceab43394a39c6739b7ee7"}, + {file = "paramiko-3.4.0.tar.gz", hash = "sha256:aac08f26a31dc4dffd92821527d1682d99d52f9ef6851968114a8728f3c274d3"}, +] + +[package.dependencies] +bcrypt = ">=3.2" +cryptography = ">=3.3" +pynacl = ">=1.5" + +[package.extras] +all = ["gssapi (>=1.4.1)", "invoke (>=2.0)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +gssapi = ["gssapi (>=1.4.1)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +invoke = ["invoke (>=2.0)"] + [[package]] name = "parso" version = "0.8.4" @@ -2333,6 +3327,17 @@ toolz = "*" [package.extras] complete = ["blosc", "numpy (>=1.20.0)", "pandas (>=1.3)", "pyzmq"] +[[package]] +name = "pathable" +version = "0.4.3" +description = "Object-oriented paths" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pathable-0.4.3-py3-none-any.whl", hash = "sha256:cdd7b1f9d7d5c8b8d3315dbf5a86b2596053ae845f056f57d97c0eefff84da14"}, + {file = "pathable-0.4.3.tar.gz", hash = "sha256:5c869d315be50776cc8a993f3af43e0c60dc01506b399643f919034ebf4cdcab"}, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -2354,95 +3359,136 @@ description = "Tiny 'shelve'-like database with concurrency support" optional = false python-versions = "*" files = [ - {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, - {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] + +[[package]] +name = "pillow" +version = "10.4.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, + {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, + {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, + {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, + {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, + {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, + {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, + {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, + {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, + {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, + {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, + {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, + {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, + {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, + {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, + {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, + {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, + {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, + {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + +[[package]] +name = "pip" +version = "24.1.1" +description = "The PyPA recommended tool for installing Python packages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pip-24.1.1-py3-none-any.whl", hash = "sha256:efca15145a95e95c00608afeab66311d40bfb73bb2266a855befd705e6bb15a0"}, + {file = "pip-24.1.1.tar.gz", hash = "sha256:5aa64f65e1952733ee0a9a9b1f52496ebdb3f3077cc46f80a16d983b58d1180a"}, ] [[package]] -name = "pillow" -version = "10.3.0" -description = "Python Imaging Library (Fork)" +name = "pip-requirements-parser" +version = "32.0.1" +description = "pip requirements parser - a mostly correct pip requirements parsing library because it uses pip's own code." optional = false -python-versions = ">=3.8" +python-versions = ">=3.6.0" files = [ - {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, - {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"}, - {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"}, - {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"}, - {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"}, - {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"}, - {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"}, - {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"}, - {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"}, - {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"}, - {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"}, - {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"}, - {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"}, - {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"}, - {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"}, - {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"}, - {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"}, - {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"}, - {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"}, - {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"}, - {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"}, - {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"}, - {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"}, - {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"}, - {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"}, - {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"}, - {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"}, - {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"}, - {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"}, - {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"}, - {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"}, - {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"}, - {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"}, - {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"}, - {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"}, -] - -[package.extras] -docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] -fpx = ["olefile"] -mic = ["olefile"] -tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions"] -xmp = ["defusedxml"] + {file = "pip-requirements-parser-32.0.1.tar.gz", hash = "sha256:b4fa3a7a0be38243123cf9d1f3518da10c51bdb165a2b2985566247f9155a7d3"}, + {file = "pip_requirements_parser-32.0.1-py3-none-any.whl", hash = "sha256:4659bc2a667783e7a15d190f6fccf8b2486685b6dba4c19c3876314769c57526"}, +] + +[package.dependencies] +packaging = "*" +pyparsing = "*" + +[package.extras] +docs = ["Sphinx (>=3.3.1)", "doc8 (>=0.8.1)", "sphinx-rtd-theme (>=0.5.0)"] +testing = ["aboutcode-toolkit (>=6.0.0)", "black", "pytest (>=6,!=7.0.0)", "pytest-xdist (>=2)"] [[package]] name = "platformdirs" @@ -2475,6 +3521,17 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +files = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] + [[package]] name = "pooch" version = "1.8.2" @@ -2514,15 +3571,29 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "prometheus-client" +version = "0.20.0" +description = "Python client for the Prometheus monitoring system." +optional = false +python-versions = ">=3.8" +files = [ + {file = "prometheus_client-0.20.0-py3-none-any.whl", hash = "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7"}, + {file = "prometheus_client-0.20.0.tar.gz", hash = "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89"}, +] + +[package.extras] +twisted = ["twisted"] + [[package]] name = "prompt-toolkit" -version = "3.0.46" +version = "3.0.47" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.7.0" files = [ - {file = "prompt_toolkit-3.0.46-py3-none-any.whl", hash = "sha256:45abe60a8300f3c618b23c16c4bb98c6fc80af8ce8b17c7ae92db48db3ee63c1"}, - {file = "prompt_toolkit-3.0.46.tar.gz", hash = "sha256:869c50d682152336e23c4db7f74667639b5047494202ffe7670817053fd57795"}, + {file = "prompt_toolkit-3.0.47-py3-none-any.whl", hash = "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10"}, + {file = "prompt_toolkit-3.0.47.tar.gz", hash = "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360"}, ] [package.dependencies] @@ -2530,27 +3601,28 @@ wcwidth = "*" [[package]] name = "psutil" -version = "5.9.8" +version = "6.0.0" description = "Cross-platform lib for process and system monitoring in Python." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" -files = [ - {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"}, - {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"}, - {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"}, - {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"}, - {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"}, - {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, - {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, - {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"}, - {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"}, - {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, - {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, - {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, - {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"}, + {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"}, + {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"}, + {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"}, + {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"}, + {file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"}, + {file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"}, + {file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"}, + {file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"}, + {file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"}, + {file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"}, + {file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"}, + {file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"}, + {file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"}, + {file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"}, ] [package.extras] @@ -2567,6 +3639,20 @@ files = [ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, ] +[[package]] +name = "py-partiql-parser" +version = "0.5.5" +description = "Pure Python PartiQL Parser" +optional = false +python-versions = "*" +files = [ + {file = "py_partiql_parser-0.5.5-py2.py3-none-any.whl", hash = "sha256:90d278818385bd60c602410c953ee78f04ece599d8cd21c656fc5e47399577a1"}, + {file = "py_partiql_parser-0.5.5.tar.gz", hash = "sha256:ed07f8edf4b55e295cab4f5fd3e2ba3196cee48a43fe210d53ddd6ffce1cf1ff"}, +] + +[package.extras] +dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"] + [[package]] name = "pyarrow" version = "16.0.0" @@ -2626,6 +3712,136 @@ files = [ {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"}, ] +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + +[[package]] +name = "pydantic" +version = "2.8.0" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.8.0-py3-none-any.whl", hash = "sha256:ead4f3a1e92386a734ca1411cb25d94147cf8778ed5be6b56749047676d6364e"}, + {file = "pydantic-2.8.0.tar.gz", hash = "sha256:d970ffb9d030b710795878940bd0489842c638e7252fc4a19c3ae2f7da4d6141"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.20.0" +typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.20.0" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e9dcd7fb34f7bfb239b5fa420033642fff0ad676b765559c3737b91f664d4fa9"}, + {file = "pydantic_core-2.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:649a764d9b0da29816889424697b2a3746963ad36d3e0968784ceed6e40c6355"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7701df088d0b05f3460f7ba15aec81ac8b0fb5690367dfd072a6c38cf5b7fdb5"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab760f17c3e792225cdaef31ca23c0aea45c14ce80d8eff62503f86a5ab76bff"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cb1ad5b4d73cde784cf64580166568074f5ccd2548d765e690546cff3d80937d"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b81ec2efc04fc1dbf400647d4357d64fb25543bae38d2d19787d69360aad21c9"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4a9732a5cad764ba37f3aa873dccb41b584f69c347a57323eda0930deec8e10"}, + {file = "pydantic_core-2.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6dc85b9e10cc21d9c1055f15684f76fa4facadddcb6cd63abab702eb93c98943"}, + {file = "pydantic_core-2.20.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:21d9f7e24f63fdc7118e6cc49defaab8c1d27570782f7e5256169d77498cf7c7"}, + {file = "pydantic_core-2.20.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8b315685832ab9287e6124b5d74fc12dda31e6421d7f6b08525791452844bc2d"}, + {file = "pydantic_core-2.20.0-cp310-none-win32.whl", hash = "sha256:c3dc8ec8b87c7ad534c75b8855168a08a7036fdb9deeeed5705ba9410721c84d"}, + {file = "pydantic_core-2.20.0-cp310-none-win_amd64.whl", hash = "sha256:85770b4b37bb36ef93a6122601795231225641003e0318d23c6233c59b424279"}, + {file = "pydantic_core-2.20.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:58e251bb5a5998f7226dc90b0b753eeffa720bd66664eba51927c2a7a2d5f32c"}, + {file = "pydantic_core-2.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:78d584caac52c24240ef9ecd75de64c760bbd0e20dbf6973631815e3ef16ef8b"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5084ec9721f82bef5ff7c4d1ee65e1626783abb585f8c0993833490b63fe1792"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d0f52684868db7c218437d260e14d37948b094493f2646f22d3dda7229bbe3f"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1def125d59a87fe451212a72ab9ed34c118ff771e5473fef4f2f95d8ede26d75"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b34480fd6778ab356abf1e9086a4ced95002a1e195e8d2fd182b0def9d944d11"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d42669d319db366cb567c3b444f43caa7ffb779bf9530692c6f244fc635a41eb"}, + {file = "pydantic_core-2.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:53b06aea7a48919a254b32107647be9128c066aaa6ee6d5d08222325f25ef175"}, + {file = "pydantic_core-2.20.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1f038156b696a1c39d763b2080aeefa87ddb4162c10aa9fabfefffc3dd8180fa"}, + {file = "pydantic_core-2.20.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3f0f3a4a23717280a5ee3ac4fb1f81d6fde604c9ec5100f7f6f987716bb8c137"}, + {file = "pydantic_core-2.20.0-cp311-none-win32.whl", hash = "sha256:316fe7c3fec017affd916a0c83d6f1ec697cbbbdf1124769fa73328e7907cc2e"}, + {file = "pydantic_core-2.20.0-cp311-none-win_amd64.whl", hash = "sha256:2d06a7fa437f93782e3f32d739c3ec189f82fca74336c08255f9e20cea1ed378"}, + {file = "pydantic_core-2.20.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d6f8c49657f3eb7720ed4c9b26624063da14937fc94d1812f1e04a2204db3e17"}, + {file = "pydantic_core-2.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad1bd2f377f56fec11d5cfd0977c30061cd19f4fa199bf138b200ec0d5e27eeb"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed741183719a5271f97d93bbcc45ed64619fa38068aaa6e90027d1d17e30dc8d"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d82e5ed3a05f2dcb89c6ead2fd0dbff7ac09bc02c1b4028ece2d3a3854d049ce"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2ba34a099576234671f2e4274e5bc6813b22e28778c216d680eabd0db3f7dad"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:879ae6bb08a063b3e1b7ac8c860096d8fd6b48dd9b2690b7f2738b8c835e744b"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b0eefc7633a04c0694340aad91fbfd1986fe1a1e0c63a22793ba40a18fcbdc8"}, + {file = "pydantic_core-2.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73deadd6fd8a23e2f40b412b3ac617a112143c8989a4fe265050fd91ba5c0608"}, + {file = "pydantic_core-2.20.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:35681445dc85446fb105943d81ae7569aa7e89de80d1ca4ac3229e05c311bdb1"}, + {file = "pydantic_core-2.20.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0f6dd3612a3b9f91f2e63924ea18a4476656c6d01843ca20a4c09e00422195af"}, + {file = "pydantic_core-2.20.0-cp312-none-win32.whl", hash = "sha256:7e37b6bb6e90c2b8412b06373c6978d9d81e7199a40e24a6ef480e8acdeaf918"}, + {file = "pydantic_core-2.20.0-cp312-none-win_amd64.whl", hash = "sha256:7d4df13d1c55e84351fab51383520b84f490740a9f1fec905362aa64590b7a5d"}, + {file = "pydantic_core-2.20.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:d43e7ab3b65e4dc35a7612cfff7b0fd62dce5bc11a7cd198310b57f39847fd6c"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b6a24d7b5893392f2b8e3b7a0031ae3b14c6c1942a4615f0d8794fdeeefb08b"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b2f13c3e955a087c3ec86f97661d9f72a76e221281b2262956af381224cfc243"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:72432fd6e868c8d0a6849869e004b8bcae233a3c56383954c228316694920b38"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d70a8ff2d4953afb4cbe6211f17268ad29c0b47e73d3372f40e7775904bc28fc"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e49524917b8d3c2f42cd0d2df61178e08e50f5f029f9af1f402b3ee64574392"}, + {file = "pydantic_core-2.20.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4f0f71653b1c1bad0350bc0b4cc057ab87b438ff18fa6392533811ebd01439c"}, + {file = "pydantic_core-2.20.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:16197e6f4fdecb9892ed2436e507e44f0a1aa2cff3b9306d1c879ea2f9200997"}, + {file = "pydantic_core-2.20.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:763602504bf640b3ded3bba3f8ed8a1cc2fc6a87b8d55c1c5689f428c49c947e"}, + {file = "pydantic_core-2.20.0-cp313-none-win32.whl", hash = "sha256:a3f243f318bd9523277fa123b3163f4c005a3e8619d4b867064de02f287a564d"}, + {file = "pydantic_core-2.20.0-cp313-none-win_amd64.whl", hash = "sha256:03aceaf6a5adaad3bec2233edc5a7905026553916615888e53154807e404545c"}, + {file = "pydantic_core-2.20.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d6f2d8b8da1f03f577243b07bbdd3412eee3d37d1f2fd71d1513cbc76a8c1239"}, + {file = "pydantic_core-2.20.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a272785a226869416c6b3c1b7e450506152d3844207331f02f27173562c917e0"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efbb412d55a4ffe73963fed95c09ccb83647ec63b711c4b3752be10a56f0090b"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1e4f46189d8740561b43655263a41aac75ff0388febcb2c9ec4f1b60a0ec12f3"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87d3df115f4a3c8c5e4d5acf067d399c6466d7e604fc9ee9acbe6f0c88a0c3cf"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a340d2bdebe819d08f605e9705ed551c3feb97e4fd71822d7147c1e4bdbb9508"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:616b9c2f882393d422ba11b40e72382fe975e806ad693095e9a3b67c59ea6150"}, + {file = "pydantic_core-2.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:25c46bb2ff6084859bbcfdf4f1a63004b98e88b6d04053e8bf324e115398e9e7"}, + {file = "pydantic_core-2.20.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:23425eccef8f2c342f78d3a238c824623836c6c874d93c726673dbf7e56c78c0"}, + {file = "pydantic_core-2.20.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:52527e8f223ba29608d999d65b204676398009725007c9336651c2ec2d93cffc"}, + {file = "pydantic_core-2.20.0-cp38-none-win32.whl", hash = "sha256:1c3c5b7f70dd19a6845292b0775295ea81c61540f68671ae06bfe4421b3222c2"}, + {file = "pydantic_core-2.20.0-cp38-none-win_amd64.whl", hash = "sha256:8093473d7b9e908af1cef30025609afc8f5fd2a16ff07f97440fd911421e4432"}, + {file = "pydantic_core-2.20.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ee7785938e407418795e4399b2bf5b5f3cf6cf728077a7f26973220d58d885cf"}, + {file = "pydantic_core-2.20.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e75794883d635071cf6b4ed2a5d7a1e50672ab7a051454c76446ef1ebcdcc91"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:344e352c96e53b4f56b53d24728217c69399b8129c16789f70236083c6ceb2ac"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:978d4123ad1e605daf1ba5e01d4f235bcf7b6e340ef07e7122e8e9cfe3eb61ab"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c05eaf6c863781eb834ab41f5963604ab92855822a2062897958089d1335dad"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bc7e43b4a528ffca8c9151b6a2ca34482c2fdc05e6aa24a84b7f475c896fc51d"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658287a29351166510ebbe0a75c373600cc4367a3d9337b964dada8d38bcc0f4"}, + {file = "pydantic_core-2.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1dacf660d6de692fe351e8c806e7efccf09ee5184865893afbe8e59be4920b4a"}, + {file = "pydantic_core-2.20.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3e147fc6e27b9a487320d78515c5f29798b539179f7777018cedf51b7749e4f4"}, + {file = "pydantic_core-2.20.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c867230d715a3dd1d962c8d9bef0d3168994ed663e21bf748b6e3a529a129aab"}, + {file = "pydantic_core-2.20.0-cp39-none-win32.whl", hash = "sha256:22b813baf0dbf612752d8143a2dbf8e33ccb850656b7850e009bad2e101fc377"}, + {file = "pydantic_core-2.20.0-cp39-none-win_amd64.whl", hash = "sha256:3a7235b46c1bbe201f09b6f0f5e6c36b16bad3d0532a10493742f91fbdc8035f"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cafde15a6f7feaec2f570646e2ffc5b73412295d29134a29067e70740ec6ee20"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2aec8eeea0b08fd6bc2213d8e86811a07491849fd3d79955b62d83e32fa2ad5f"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:840200827984f1c4e114008abc2f5ede362d6e11ed0b5931681884dd41852ff1"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ea1d8b7df522e5ced34993c423c3bf3735c53df8b2a15688a2f03a7d678800"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5b8376a867047bf08910573deb95d3c8dfb976eb014ee24f3b5a61ccc5bee1b"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d08264b4460326cefacc179fc1411304d5af388a79910832835e6f641512358b"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7a3639011c2e8a9628466f616ed7fb413f30032b891898e10895a0a8b5857d6c"}, + {file = "pydantic_core-2.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:05e83ce2f7eba29e627dd8066aa6c4c0269b2d4f889c0eba157233a353053cea"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:603a843fea76a595c8f661cd4da4d2281dff1e38c4a836a928eac1a2f8fe88e4"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac76f30d5d3454f4c28826d891fe74d25121a346c69523c9810ebba43f3b1cec"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e3b1d4b1b3f6082849f9b28427ef147a5b46a6132a3dbaf9ca1baa40c88609"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2761f71faed820e25ec62eacba670d1b5c2709bb131a19fcdbfbb09884593e5a"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a0586cddbf4380e24569b8a05f234e7305717cc8323f50114dfb2051fcbce2a3"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b8c46a8cf53e849eea7090f331ae2202cd0f1ceb090b00f5902c423bd1e11805"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b4a085bd04af7245e140d1b95619fe8abb445a3d7fdf219b3f80c940853268ef"}, + {file = "pydantic_core-2.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:116b326ac82c8b315e7348390f6d30bcfe6e688a7d3f1de50ff7bcc2042a23c2"}, + {file = "pydantic_core-2.20.0.tar.gz", hash = "sha256:366be8e64e0cb63d87cf79b4e1765c0703dd6313c729b22e7b9e378db6b96877"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + [[package]] name = "pygments" version = "2.18.0" @@ -2640,6 +3856,78 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pynacl" +version = "1.5.0" +description = "Python binding to the Networking and Cryptography (NaCl) library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"}, + {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"}, +] + +[package.dependencies] +cffi = ">=1.4.1" + +[package.extras] +docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] + +[[package]] +name = "pyogrio" +version = "0.9.0" +description = "Vectorized spatial vector file format I/O using GDAL/OGR" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyogrio-0.9.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:1a495ca4fb77c69595747dd688f8f17bb7d2ea9cd86603aa71c7fc98cc8b4174"}, + {file = "pyogrio-0.9.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:6dc94a67163218581c7df275223488ac9b31dc582ccd756da607c3338908566c"}, + {file = "pyogrio-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e38c3c6d37cf2cc969407e4d051dcb507cfd948eb26c7b0840c4f7d7d4a71bd4"}, + {file = "pyogrio-0.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:f47c9b6818cc0f420015b672d5dcc488530a5ee63e5ba35a184957b21ea3922a"}, + {file = "pyogrio-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb04bd80964428491951766452f0071b0bc37c7d38c45ef02502dbd83e5d74a0"}, + {file = "pyogrio-0.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f5d80eb846be4fc4e642cbedc1ed0c143e8d241653382ecc76a7620bbd2a5c3a"}, + {file = "pyogrio-0.9.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:2f2ec57ab74785db9c2bf47c0a6731e5175595a13f8253f06fa84136adb310a9"}, + {file = "pyogrio-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a289584da6df7ca318947301fe0ba9177e7f863f63110e087c80ac5f3658de8"}, + {file = "pyogrio-0.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:13642608a1cd67797ae8b5d792b0518d8ef3eb76506c8232ab5eaa1ea1159dff"}, + {file = "pyogrio-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:9440466c0211ac81f3417f274da5903f15546b486f76b2f290e74a56aaf0e737"}, + {file = "pyogrio-0.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2e98913fa183f7597c609e774820a149e9329fd2a0f8d33978252fbd00ae87e6"}, + {file = "pyogrio-0.9.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f8bf193269ea9d347ac3ddada960a59f1ab2e4a5c009be95dc70e6505346b2fc"}, + {file = "pyogrio-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f964002d445521ad5b8e732a6b5ef0e2d2be7fe566768e5075c1d71398da64a"}, + {file = "pyogrio-0.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:083351b258b3e08b6c6085dac560bd321b68de5cb4a66229095da68d5f3d696b"}, + {file = "pyogrio-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:796e4f6a4e769b2eb6fea9a10546ea4bdee16182d1e29802b4d6349363c3c1d7"}, + {file = "pyogrio-0.9.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:7fcafed24371fe6e23bcf5abebbb29269f8d79915f1dd818ac85453657ea714a"}, + {file = "pyogrio-0.9.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:30cbeeaedb9bced7012487e7438919aa0c7dfba18ac3d4315182b46eb3139b9d"}, + {file = "pyogrio-0.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4da0b9deb380bd9a200fee13182c4f95b02b4c554c923e2e0032f32aaf1439ed"}, + {file = "pyogrio-0.9.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:4e0f90a6c3771ee1f1fea857778b4b6a1b64000d851b819f435f9091b3c38c60"}, + {file = "pyogrio-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:959022f3ad04053f8072dc9a2ad110c46edd9e4f92352061ba835fc91df3ca96"}, + {file = "pyogrio-0.9.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:2829615cf58b1b24a9f96fea42abedaa1a800dd351c67374cc2f6341138608f3"}, + {file = "pyogrio-0.9.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:17420febc17651876d5140b54b24749aa751d482b5f9ef6267b8053e6e962876"}, + {file = "pyogrio-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a2fcaa269031dbbc8ebd91243c6452c5d267d6df939c008ab7533413c9cf92d"}, + {file = "pyogrio-0.9.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:019731a856a9abfe909e86f50eb13f8362f6742337caf757c54b7c8acfe75b89"}, + {file = "pyogrio-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:d668cb10f2bf6ccd7c402f91e8b06290722dd09dbe265ae95b2c13db29ebeba0"}, + {file = "pyogrio-0.9.0.tar.gz", hash = "sha256:6a6fa2e8cf95b3d4a7c0fac48bce6e5037579e28d3eb33b53349d6e11f15e5a8"}, +] + +[package.dependencies] +certifi = "*" +numpy = "*" +packaging = "*" + +[package.extras] +benchmark = ["pytest-benchmark"] +dev = ["Cython"] +geopandas = ["geopandas"] +test = ["pytest", "pytest-cov"] + [[package]] name = "pyparsing" version = "3.1.2" @@ -2810,6 +4098,29 @@ files = [ {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -2907,6 +4218,94 @@ files = [ attrs = ">=22.2.0" rpds-py = ">=0.7.0" +[[package]] +name = "regex" +version = "2024.5.15" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, + {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, + {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, + {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, + {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, + {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, + {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, + {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, + {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, + {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, + {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, + {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, +] + [[package]] name = "requests" version = "2.32.3" @@ -2928,6 +4327,57 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "responses" +version = "0.25.3" +description = "A utility library for mocking out the `requests` Python library." +optional = false +python-versions = ">=3.8" +files = [ + {file = "responses-0.25.3-py3-none-any.whl", hash = "sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb"}, + {file = "responses-0.25.3.tar.gz", hash = "sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba"}, +] + +[package.dependencies] +pyyaml = "*" +requests = ">=2.30.0,<3.0" +urllib3 = ">=1.25.10,<3.0" + +[package.extras] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"] + +[[package]] +name = "rfc3339-validator" +version = "0.1.4" +description = "A pure python RFC3339 validator" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, + {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "rich" +version = "13.7.1" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"}, + {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "rpds-py" version = "0.18.1" @@ -3038,45 +4488,46 @@ files = [ [[package]] name = "ruff" -version = "0.4.8" +version = "0.5.0" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.4.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7663a6d78f6adb0eab270fa9cf1ff2d28618ca3a652b60f2a234d92b9ec89066"}, - {file = "ruff-0.4.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eeceb78da8afb6de0ddada93112869852d04f1cd0f6b80fe464fd4e35c330913"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aad360893e92486662ef3be0a339c5ca3c1b109e0134fcd37d534d4be9fb8de3"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:284c2e3f3396fb05f5f803c9fffb53ebbe09a3ebe7dda2929ed8d73ded736deb"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7354f921e3fbe04d2a62d46707e569f9315e1a613307f7311a935743c51a764"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:72584676164e15a68a15778fd1b17c28a519e7a0622161eb2debdcdabdc71883"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9678d5c9b43315f323af2233a04d747409d1e3aa6789620083a82d1066a35199"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704977a658131651a22b5ebeb28b717ef42ac6ee3b11e91dc87b633b5d83142b"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d05f8d6f0c3cce5026cecd83b7a143dcad503045857bc49662f736437380ad45"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6ea874950daca5697309d976c9afba830d3bf0ed66887481d6bca1673fc5b66a"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fc95aac2943ddf360376be9aa3107c8cf9640083940a8c5bd824be692d2216dc"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:384154a1c3f4bf537bac69f33720957ee49ac8d484bfc91720cc94172026ceed"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e9d5ce97cacc99878aa0d084c626a15cd21e6b3d53fd6f9112b7fc485918e1fa"}, - {file = "ruff-0.4.8-py3-none-win32.whl", hash = "sha256:6d795d7639212c2dfd01991259460101c22aabf420d9b943f153ab9d9706e6a9"}, - {file = "ruff-0.4.8-py3-none-win_amd64.whl", hash = "sha256:e14a3a095d07560a9d6769a72f781d73259655919d9b396c650fc98a8157555d"}, - {file = "ruff-0.4.8-py3-none-win_arm64.whl", hash = "sha256:14019a06dbe29b608f6b7cbcec300e3170a8d86efaddb7b23405cb7f7dcaf780"}, - {file = "ruff-0.4.8.tar.gz", hash = "sha256:16d717b1d57b2e2fd68bd0bf80fb43931b79d05a7131aa477d66fc40fbd86268"}, + {file = "ruff-0.5.0-py3-none-linux_armv6l.whl", hash = "sha256:ee770ea8ab38918f34e7560a597cc0a8c9a193aaa01bfbd879ef43cb06bd9c4c"}, + {file = "ruff-0.5.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:38f3b8327b3cb43474559d435f5fa65dacf723351c159ed0dc567f7ab735d1b6"}, + {file = "ruff-0.5.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7594f8df5404a5c5c8f64b8311169879f6cf42142da644c7e0ba3c3f14130370"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:adc7012d6ec85032bc4e9065110df205752d64010bed5f958d25dbee9ce35de3"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d505fb93b0fabef974b168d9b27c3960714d2ecda24b6ffa6a87ac432905ea38"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dc5cfd3558f14513ed0d5b70ce531e28ea81a8a3b1b07f0f48421a3d9e7d80a"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:db3ca35265de239a1176d56a464b51557fce41095c37d6c406e658cf80bbb362"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b1a321c4f68809fddd9b282fab6a8d8db796b270fff44722589a8b946925a2a8"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c4dfcd8d34b143916994b3876b63d53f56724c03f8c1a33a253b7b1e6bf2a7d"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81e5facfc9f4a674c6a78c64d38becfbd5e4f739c31fcd9ce44c849f1fad9e4c"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e589e27971c2a3efff3fadafb16e5aef7ff93250f0134ec4b52052b673cf988d"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d2ffbc3715a52b037bcb0f6ff524a9367f642cdc5817944f6af5479bbb2eb50e"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cd096e23c6a4f9c819525a437fa0a99d1c67a1b6bb30948d46f33afbc53596cf"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:46e193b36f2255729ad34a49c9a997d506e58f08555366b2108783b3064a0e1e"}, + {file = "ruff-0.5.0-py3-none-win32.whl", hash = "sha256:49141d267100f5ceff541b4e06552e98527870eafa1acc9dec9139c9ec5af64c"}, + {file = "ruff-0.5.0-py3-none-win_amd64.whl", hash = "sha256:e9118f60091047444c1b90952736ee7b1792910cab56e9b9a9ac20af94cd0440"}, + {file = "ruff-0.5.0-py3-none-win_arm64.whl", hash = "sha256:ed5c4df5c1fb4518abcb57725b576659542bdbe93366f4f329e8f398c4b71178"}, + {file = "ruff-0.5.0.tar.gz", hash = "sha256:eb641b5873492cf9bd45bc9c5ae5320648218e04386a5f0c264ad6ccce8226a1"}, ] [[package]] name = "s3fs" -version = "2024.6.0" +version = "2024.6.1" description = "Convenient Filesystem interface over S3" optional = false python-versions = ">=3.8" files = [ - {file = "s3fs-2024.6.0-py3-none-any.whl", hash = "sha256:8d5f591956a61c7d64097eff4847598826f09d60b4ce9a16202565693569f6d4"}, - {file = "s3fs-2024.6.0.tar.gz", hash = "sha256:a59020ededc61e9666f1e473ce4aa28764e5f7b3c97414beb15cd9be522a87b6"}, + {file = "s3fs-2024.6.1-py3-none-any.whl", hash = "sha256:ecd20863437409eec1cbfff0b7df5e9772cf7c1926008efab2e17e46f6d52c63"}, + {file = "s3fs-2024.6.1.tar.gz", hash = "sha256:6c2106d6c34fbfbb88e3d20c6f3572896d5ee3d3512896696301c21a3c541bea"}, ] [package.dependencies] aiobotocore = ">=2.5.4,<3.0.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "==2024.6.0.*" +fsspec = "==2024.6.1.*" [package.extras] awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] @@ -3084,13 +4535,13 @@ boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] [[package]] name = "s3transfer" -version = "0.10.1" +version = "0.10.2" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">= 3.8" +python-versions = ">=3.8" files = [ - {file = "s3transfer-0.10.1-py3-none-any.whl", hash = "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d"}, - {file = "s3transfer-0.10.1.tar.gz", hash = "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19"}, + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, ] [package.dependencies] @@ -3101,45 +4552,45 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "scipy" -version = "1.13.1" +version = "1.14.0" description = "Fundamental algorithms for scientific computing in Python" optional = false -python-versions = ">=3.9" -files = [ - {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, - {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, - {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, - {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, - {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, - {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, - {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, - {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, - {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, - {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, - {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, - {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, - {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, - {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, - {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, - {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, - {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, - {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, - {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, - {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, - {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, - {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, - {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, - {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, - {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, -] - -[package.dependencies] -numpy = ">=1.22.4,<2.3" - -[package.extras] -dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] -doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] -test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +python-versions = ">=3.10" +files = [ + {file = "scipy-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484"}, + {file = "scipy-1.14.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:687af0a35462402dd851726295c1a5ae5f987bd6e9026f52e9505994e2f84ef6"}, + {file = "scipy-1.14.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:07e179dc0205a50721022344fb85074f772eadbda1e1b3eecdc483f8033709b7"}, + {file = "scipy-1.14.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a9c9a9b226d9a21e0a208bdb024c3982932e43811b62d202aaf1bb59af264b1"}, + {file = "scipy-1.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:076c27284c768b84a45dcf2e914d4000aac537da74236a0d45d82c6fa4b7b3c0"}, + {file = "scipy-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42470ea0195336df319741e230626b6225a740fd9dce9642ca13e98f667047c0"}, + {file = "scipy-1.14.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:176c6f0d0470a32f1b2efaf40c3d37a24876cebf447498a4cefb947a79c21e9d"}, + {file = "scipy-1.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:ad36af9626d27a4326c8e884917b7ec321d8a1841cd6dacc67d2a9e90c2f0359"}, + {file = "scipy-1.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6d056a8709ccda6cf36cdd2eac597d13bc03dba38360f418560a93050c76a16e"}, + {file = "scipy-1.14.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f0a50da861a7ec4573b7c716b2ebdcdf142b66b756a0d392c236ae568b3a93fb"}, + {file = "scipy-1.14.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:94c164a9e2498e68308e6e148646e486d979f7fcdb8b4cf34b5441894bdb9caf"}, + {file = "scipy-1.14.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:a7d46c3e0aea5c064e734c3eac5cf9eb1f8c4ceee756262f2c7327c4c2691c86"}, + {file = "scipy-1.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9eee2989868e274aae26125345584254d97c56194c072ed96cb433f32f692ed8"}, + {file = "scipy-1.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3154691b9f7ed73778d746da2df67a19d046a6c8087c8b385bc4cdb2cfca74"}, + {file = "scipy-1.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c40003d880f39c11c1edbae8144e3813904b10514cd3d3d00c277ae996488cdb"}, + {file = "scipy-1.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:5b083c8940028bb7e0b4172acafda6df762da1927b9091f9611b0bcd8676f2bc"}, + {file = "scipy-1.14.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff2438ea1330e06e53c424893ec0072640dac00f29c6a43a575cbae4c99b2b9"}, + {file = "scipy-1.14.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bbc0471b5f22c11c389075d091d3885693fd3f5e9a54ce051b46308bc787e5d4"}, + {file = "scipy-1.14.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:64b2ff514a98cf2bb734a9f90d32dc89dc6ad4a4a36a312cd0d6327170339eb0"}, + {file = "scipy-1.14.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:7d3da42fbbbb860211a811782504f38ae7aaec9de8764a9bef6b262de7a2b50f"}, + {file = "scipy-1.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d91db2c41dd6c20646af280355d41dfa1ec7eead235642178bd57635a3f82209"}, + {file = "scipy-1.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a01cc03bcdc777c9da3cfdcc74b5a75caffb48a6c39c8450a9a05f82c4250a14"}, + {file = "scipy-1.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:65df4da3c12a2bb9ad52b86b4dcf46813e869afb006e58be0f516bc370165159"}, + {file = "scipy-1.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:4c4161597c75043f7154238ef419c29a64ac4a7c889d588ea77690ac4d0d9b20"}, + {file = "scipy-1.14.0.tar.gz", hash = "sha256:b5923f48cb840380f9854339176ef21763118a7300a88203ccd0bdd26e58527b"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] [[package]] name = "seaborn" @@ -3164,18 +4615,18 @@ stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"] [[package]] name = "setuptools" -version = "70.0.0" +version = "70.2.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, - {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, + {file = "setuptools-70.2.0-py3-none-any.whl", hash = "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05"}, + {file = "setuptools-70.2.0.tar.gz", hash = "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "shapely" @@ -3245,6 +4696,28 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +optional = false +python-versions = "*" +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + [[package]] name = "sortedcontainers" version = "2.4.0" @@ -3256,6 +4729,149 @@ files = [ {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, ] +[[package]] +name = "sphinx" +version = "7.3.7" +description = "Python documentation generator" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinx-7.3.7-py3-none-any.whl", hash = "sha256:413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3"}, + {file = "sphinx-7.3.7.tar.gz", hash = "sha256:a4a7db75ed37531c05002d56ed6948d4c42f473a36f46e1382b0bd76ca9627bc"}, +] + +[package.dependencies] +alabaster = ">=0.7.14,<0.8.0" +babel = ">=2.9" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.18.1,<0.22" +imagesize = ">=1.3" +Jinja2 = ">=3.0" +packaging = ">=21.0" +Pygments = ">=2.14" +requests = ">=2.25.0" +snowballstemmer = ">=2.0" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = ">=2.0.0" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = ">=1.1.9" +tomli = {version = ">=2", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["flake8 (>=3.5.0)", "importlib_metadata", "mypy (==1.9.0)", "pytest (>=6.0)", "ruff (==0.3.7)", "sphinx-lint", "tomli", "types-docutils", "types-requests"] +test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=6.0)", "setuptools (>=67.0)"] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.8" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_applehelp-1.0.8-py3-none-any.whl", hash = "sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4"}, + {file = "sphinxcontrib_applehelp-1.0.8.tar.gz", hash = "sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.6" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_devhelp-1.0.6-py3-none-any.whl", hash = "sha256:6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f"}, + {file = "sphinxcontrib_devhelp-1.0.6.tar.gz", hash = "sha256:9893fd3f90506bc4b97bdb977ceb8fbd823989f4316b28c3841ec128544372d3"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.5" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl", hash = "sha256:393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04"}, + {file = "sphinxcontrib_htmlhelp-2.0.5.tar.gz", hash = "sha256:0dc87637d5de53dd5eec3a6a01753b1ccf99494bd756aafecd74b4fa9e729015"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.7" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_qthelp-1.0.7-py3-none-any.whl", hash = "sha256:e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182"}, + {file = "sphinxcontrib_qthelp-1.0.7.tar.gz", hash = "sha256:053dedc38823a80a7209a80860b16b722e9e0209e32fea98c90e4e6624588ed6"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.10" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl", hash = "sha256:326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7"}, + {file = "sphinxcontrib_serializinghtml-1.1.10.tar.gz", hash = "sha256:93f3f5dc458b91b192fe10c397e324f262cf163d79f3282c158e8436a2c4511f"}, +] + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +standalone = ["Sphinx (>=5)"] +test = ["pytest"] + +[[package]] +name = "sympy" +version = "1.12.1" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"}, + {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"}, +] + +[package.dependencies] +mpmath = ">=1.1.0,<1.4.0" + [[package]] name = "tblib" version = "3.0.0" @@ -3267,6 +4883,17 @@ files = [ {file = "tblib-3.0.0.tar.gz", hash = "sha256:93622790a0a29e04f0346458face1e144dc4d32f493714c6c3dff82a4adb77e6"}, ] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -3326,13 +4953,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "typing-extensions" -version = "4.12.1" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, - {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -3348,13 +4975,13 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "2.2.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, ] [package.extras] @@ -3365,13 +4992,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.26.2" +version = "20.26.3" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"}, - {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"}, + {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, + {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, ] [package.dependencies] @@ -3394,6 +5021,48 @@ files = [ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, ] +[[package]] +name = "werkzeug" +version = "3.0.3" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.8" +files = [ + {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, + {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + +[[package]] +name = "wheel" +version = "0.43.0" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "wheel-0.43.0-py3-none-any.whl", hash = "sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81"}, + {file = "wheel-0.43.0.tar.gz", hash = "sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "widgetsnbextension" +version = "4.0.11" +description = "Jupyter interactive widgets for Jupyter Notebook" +optional = false +python-versions = ">=3.7" +files = [ + {file = "widgetsnbextension-4.0.11-py3-none-any.whl", hash = "sha256:55d4d6949d100e0d08b94948a42efc3ed6dfdc0e9468b2c4b128c9a2ce3a7a36"}, + {file = "widgetsnbextension-4.0.11.tar.gz", hash = "sha256:8b22a8f1910bfd188e596fe7fc05dcbd87e810c8a4ba010bdb3da86637398474"}, +] + [[package]] name = "wrapt" version = "1.16.0" @@ -3475,13 +5144,13 @@ files = [ [[package]] name = "xarray" -version = "2024.5.0" +version = "2024.6.0" description = "N-D labeled arrays and datasets in Python" optional = false python-versions = ">=3.9" files = [ - {file = "xarray-2024.5.0-py3-none-any.whl", hash = "sha256:7ddedfe2294a0ab00f02d0fbdcb9c6300ec589f3cf436a9c7b7b577a12cd9bcf"}, - {file = "xarray-2024.5.0.tar.gz", hash = "sha256:e0eb1cb265f265126795f388ed9591f3c752f2aca491f6c0576711fd15b708f2"}, + {file = "xarray-2024.6.0-py3-none-any.whl", hash = "sha256:721a7394e8ec3d592b2d8ebe21eed074ac077dc1bb1bd777ce00e41700b4866c"}, + {file = "xarray-2024.6.0.tar.gz", hash = "sha256:0b91e0bc4dc0296947947640fe31ec6e867ce258d2f7cbc10bedf4a6d68340c7"}, ] [package.dependencies] @@ -3521,6 +5190,17 @@ io = ["cftime", "fsspec", "h5netcdf", "netCDF4", "pooch", "pydap", "scipy", "zar parallel = ["dask[complete]"] viz = ["matplotlib", "nc-time-axis", "seaborn"] +[[package]] +name = "xmltodict" +version = "0.13.0" +description = "Makes working with XML feel like you are working with JSON" +optional = false +python-versions = ">=3.4" +files = [ + {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, + {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, +] + [[package]] name = "xyzservices" version = "2024.6.0" @@ -3685,4 +5365,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "==3.10.14" -content-hash = "005ce2e63056b5eb351b45c73f010b493e51c6adb5490fa64ccbb075f6c5740d" +content-hash = "1264c2e84e01cd3b4e4b21ee672dfa3ddd47c3d2c9702b9c733a5d31e37a6590" diff --git a/pyproject.toml b/pyproject.toml index 6a659d0..e710f68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,8 @@ classifiers = [ [tool.poetry.dependencies] python = "==3.10.14" -netCDF4 = ">=1.6.5" -numpy = ">=1.26.4" +netCDF4 = "==1.6.5" # hardpinned because of bug https://github.com/Unidata/netcdf4-python/issues/1342 +numpy = "<2.0.0" pandas = ">=2.2.2" python-dateutil = ">=2.9.0" jsonschema = ">=4.22.0" @@ -42,14 +42,18 @@ rechunker = ">=0.5.2" s3fs = ">=2024.3.1" shapely = ">=2.0.4" xarray = { version = ">=2024.3.0", extras = ["complete"] } -zarr = ">=2.18.0" +zarr = ">=2.18.2" geopandas = ">=0.14.4" +coiled = ">=1.27.2" +dask = ">=2024.6.0" [tool.poetry.dev-dependencies] pytest = "^8.2.1" ipdb = "^0.13" ipython = "^7.5.3" - +moto = {version = ">=5.0.0", extras = ["ec2", "s3", "server", "all"]} # Add Moto with optional dependencies +fuzzywuzzy = ">=0.18.0" +sphinx = ">=7.3.7" #[tool.poetry.extras] #testing = ["pytest", "ipython", "ipdb"] @@ -66,9 +70,11 @@ anmn_aqualogger_to_parquet = "aodn_cloud_optimised.bin.anmn_aqualogger_to_parque ardc_wave_to_parquet = "aodn_cloud_optimised.bin.ardc_wave_to_parquet:main" argo_to_parquet = "aodn_cloud_optimised.bin.argo_to_parquet:main" gsla_nrt_to_zarr = "aodn_cloud_optimised.bin.gsla_nrt_to_zarr:main" +generic_cloud_optimised_creation = "aodn_cloud_optimised.bin.generic_cloud_optimised_creation:main" soop_xbt_nrt_to_parquet = "aodn_cloud_optimised.bin.soop_xbt_nrt_to_parquet:main" srs_oc_ljco_to_parquet = "aodn_cloud_optimised.bin.srs_oc_ljco_to_parquet:main" srs_l3s_1d_dn_to_zarr = "aodn_cloud_optimised.bin.srs_l3s_1d_dn_to_zarr:main" +srs_l3s_3d_dn_to_zarr = "aodn_cloud_optimised.bin.srs_l3s_3d_dn_to_zarr:main" #[tool.poetry.include] #data = ["aodn_cloud_optimised/config/*.json", "aodn_cloud_optimised/config/dataset/*.json"] @@ -77,6 +83,9 @@ srs_l3s_1d_dn_to_zarr = "aodn_cloud_optimised.bin.srs_l3s_1d_dn_to_zarr:main" pytest = "^8.2.1" coverage = "^7.5.3" pre-commit = "^3.7.1" +moto = {version = ">=5.0.0", extras = ["ec2", "s3", "server", "all"]} # Add Moto with optional dependencies +sphinx = ">=7.3.7" + [tool.pre_commit] version = "2.3.0" diff --git a/requirements.txt b/requirements.txt index 108df31..3929f0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,123 @@ -aiobotocore==2.13.0 ; python_full_version == "3.10.14" +aiobotocore==2.13.1 ; python_full_version == "3.10.14" aiohttp==3.9.5 ; python_full_version == "3.10.14" aioitertools==0.11.0 ; python_full_version == "3.10.14" aiosignal==1.3.1 ; python_full_version == "3.10.14" +anyio==4.4.0 ; python_full_version == "3.10.14" +appnope==0.1.4 ; sys_platform == "darwin" and python_full_version == "3.10.14" asciitree==0.3.3 ; python_full_version == "3.10.14" async-timeout==4.0.3 ; python_full_version == "3.10.14" attrs==23.2.0 ; python_full_version == "3.10.14" -bokeh==3.4.1 ; python_full_version == "3.10.14" -boto3==1.34.106 ; python_full_version == "3.10.14" -botocore==1.34.106 ; python_full_version == "3.10.14" +backcall==0.2.0 ; python_full_version == "3.10.14" +backoff==2.2.1 ; python_full_version == "3.10.14" +bcrypt==4.1.3 ; python_full_version == "3.10.14" +bokeh==3.4.2 ; python_full_version == "3.10.14" +boto3==1.34.131 ; python_full_version == "3.10.14" +botocore==1.34.131 ; python_full_version == "3.10.14" certifi==2024.6.2 ; python_full_version == "3.10.14" -cftime==1.6.3 ; python_full_version == "3.10.14" +cffi==1.16.0 ; python_full_version == "3.10.14" +cftime==1.6.4 ; python_full_version == "3.10.14" click==8.1.7 ; python_full_version == "3.10.14" cloudpickle==3.0.0 ; python_full_version == "3.10.14" -colorama==0.4.6 ; python_full_version == "3.10.14" and platform_system == "Windows" +coiled==1.34.0 ; python_full_version == "3.10.14" +colorama==0.4.6 ; python_full_version == "3.10.14" and (platform_system == "Windows" or sys_platform == "win32") +comm==0.2.2 ; python_full_version == "3.10.14" contourpy==1.2.1 ; python_full_version == "3.10.14" -dask[array,diagnostics]==2024.5.2 ; python_full_version == "3.10.14" +cryptography==42.0.8 ; python_full_version == "3.10.14" +dask==2024.6.2 ; python_full_version == "3.10.14" +dask[array,diagnostics]==2024.6.2 ; python_full_version == "3.10.14" +decorator==5.1.1 ; python_full_version == "3.10.14" +deprecated==1.2.14 ; python_full_version == "3.10.14" +distributed==2024.6.2 ; python_full_version == "3.10.14" +exceptiongroup==1.2.1 ; python_full_version == "3.10.14" +fabric==3.2.2 ; python_full_version == "3.10.14" fasteners==0.19 ; sys_platform != "emscripten" and python_full_version == "3.10.14" +filelock==3.15.4 ; python_full_version == "3.10.14" frozenlist==1.4.1 ; python_full_version == "3.10.14" -fsspec==2024.6.0 ; python_full_version == "3.10.14" +fsspec==2024.6.1 ; python_full_version == "3.10.14" +geopandas==1.0.1 ; python_full_version == "3.10.14" +gilknocker==0.4.1 ; python_full_version == "3.10.14" +h11==0.14.0 ; python_full_version == "3.10.14" +h2==4.1.0 ; python_full_version == "3.10.14" h5netcdf==1.3.0 ; python_full_version == "3.10.14" h5py==3.11.0 ; python_full_version == "3.10.14" +hpack==4.0.0 ; python_full_version == "3.10.14" +httpcore==1.0.5 ; python_full_version == "3.10.14" +httpx[http2]==0.27.0 ; python_full_version == "3.10.14" +hyperframe==6.0.1 ; python_full_version == "3.10.14" idna==3.7 ; python_full_version == "3.10.14" -importlib-metadata==7.1.0 ; python_full_version == "3.10.14" +importlib-metadata==8.0.0 ; python_full_version == "3.10.14" +invoke==2.2.0 ; python_full_version == "3.10.14" +ipython==7.34.0 ; python_full_version == "3.10.14" +ipywidgets==8.1.3 ; python_full_version == "3.10.14" +jedi==0.19.1 ; python_full_version == "3.10.14" jinja2==3.1.4 ; python_full_version == "3.10.14" jmespath==1.0.1 ; python_full_version == "3.10.14" +jsondiff==2.1.1 ; python_full_version == "3.10.14" jsonschema-specifications==2023.12.1 ; python_full_version == "3.10.14" jsonschema==4.22.0 ; python_full_version == "3.10.14" +jupyterlab-widgets==3.0.11 ; python_full_version == "3.10.14" locket==1.0.0 ; python_full_version == "3.10.14" +markdown-it-py==3.0.0 ; python_full_version == "3.10.14" markupsafe==2.1.5 ; python_full_version == "3.10.14" +matplotlib-inline==0.1.7 ; python_full_version == "3.10.14" +mdurl==0.1.2 ; python_full_version == "3.10.14" +msgpack==1.0.8 ; python_full_version == "3.10.14" multidict==6.0.5 ; python_full_version == "3.10.14" mypy-extensions==1.0.0 ; python_full_version == "3.10.14" netcdf4==1.6.5 ; python_full_version == "3.10.14" numcodecs==0.12.1 ; python_full_version == "3.10.14" numpy==1.26.4 ; python_full_version == "3.10.14" -packaging==24.0 ; python_full_version == "3.10.14" +packaging==24.1 ; python_full_version == "3.10.14" pandas==2.2.2 ; python_full_version == "3.10.14" +paramiko==3.4.0 ; python_full_version == "3.10.14" +parso==0.8.4 ; python_full_version == "3.10.14" partd==1.4.2 ; python_full_version == "3.10.14" -pillow==10.3.0 ; python_full_version == "3.10.14" +pexpect==4.9.0 ; sys_platform != "win32" and python_full_version == "3.10.14" +pickleshare==0.7.5 ; python_full_version == "3.10.14" +pillow==10.4.0 ; python_full_version == "3.10.14" +pip-requirements-parser==32.0.1 ; python_full_version == "3.10.14" +pip==24.1.1 ; python_full_version == "3.10.14" +prometheus-client==0.20.0 ; python_full_version == "3.10.14" +prompt-toolkit==3.0.47 ; python_full_version == "3.10.14" +psutil==6.0.0 ; python_full_version == "3.10.14" +ptyprocess==0.7.0 ; sys_platform != "win32" and python_full_version == "3.10.14" pyarrow==16.0.0 ; python_full_version == "3.10.14" +pycparser==2.22 ; python_full_version == "3.10.14" +pygments==2.18.0 ; python_full_version == "3.10.14" +pynacl==1.5.0 ; python_full_version == "3.10.14" +pyogrio==0.9.0 ; python_full_version == "3.10.14" +pyparsing==3.1.2 ; python_full_version == "3.10.14" +pyproj==3.6.1 ; python_full_version == "3.10.14" python-dateutil==2.9.0.post0 ; python_full_version == "3.10.14" pytz==2024.1 ; python_full_version == "3.10.14" pyyaml==6.0.1 ; python_full_version == "3.10.14" rechunker==0.5.2 ; python_full_version == "3.10.14" referencing==0.35.1 ; python_full_version == "3.10.14" +rich==13.7.1 ; python_full_version == "3.10.14" rpds-py==0.18.1 ; python_full_version == "3.10.14" -s3fs==2024.6.0 ; python_full_version == "3.10.14" -s3transfer==0.10.1 ; python_full_version == "3.10.14" -scipy==1.13.1 ; python_full_version == "3.10.14" +s3fs==2024.6.1 ; python_full_version == "3.10.14" +s3transfer==0.10.2 ; python_full_version == "3.10.14" +scipy==1.14.0 ; python_full_version == "3.10.14" +setuptools==70.2.0 ; python_full_version == "3.10.14" shapely==2.0.4 ; python_full_version == "3.10.14" six==1.16.0 ; python_full_version == "3.10.14" +sniffio==1.3.1 ; python_full_version == "3.10.14" +sortedcontainers==2.4.0 ; python_full_version == "3.10.14" +tblib==3.0.0 ; python_full_version == "3.10.14" +toml==0.10.2 ; python_full_version == "3.10.14" toolz==0.12.1 ; python_full_version == "3.10.14" -tornado==6.4 ; python_full_version == "3.10.14" +tornado==6.4.1 ; python_full_version == "3.10.14" +traitlets==5.14.3 ; python_full_version == "3.10.14" +typing-extensions==4.12.2 ; python_full_version == "3.10.14" tzdata==2024.1 ; python_full_version == "3.10.14" -urllib3==2.2.1 ; python_full_version == "3.10.14" +urllib3==2.2.2 ; python_full_version == "3.10.14" +wcwidth==0.2.13 ; python_full_version == "3.10.14" +wheel==0.43.0 ; python_full_version == "3.10.14" +widgetsnbextension==4.0.11 ; python_full_version == "3.10.14" wrapt==1.16.0 ; python_full_version == "3.10.14" -xarray[complete]==2024.5.0 ; python_full_version == "3.10.14" -xyzservices==2024.4.0 ; python_full_version == "3.10.14" +xarray[complete]==2024.6.0 ; python_full_version == "3.10.14" +xyzservices==2024.6.0 ; python_full_version == "3.10.14" yarl==1.9.4 ; python_full_version == "3.10.14" zarr==2.18.2 ; python_full_version == "3.10.14" -zipp==3.19.1 ; python_full_version == "3.10.14" +zict==3.0.0 ; python_full_version == "3.10.14" +zipp==3.19.2 ; python_full_version == "3.10.14" diff --git a/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T000000Z_TURQ_FV01_1-hour-avg.nc b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T000000Z_TURQ_FV01_1-hour-avg.nc new file mode 100644 index 0000000..7b7c5fa Binary files /dev/null and b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T000000Z_TURQ_FV01_1-hour-avg.nc differ diff --git a/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T010000Z_TURQ_FV01_1-hour-avg.nc b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T010000Z_TURQ_FV01_1-hour-avg.nc new file mode 100644 index 0000000..8fb8bc8 Binary files /dev/null and b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T010000Z_TURQ_FV01_1-hour-avg.nc differ diff --git a/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T020000Z_TURQ_FV01_1-hour-avg.nc b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T020000Z_TURQ_FV01_1-hour-avg.nc new file mode 100644 index 0000000..bc20400 Binary files /dev/null and b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T020000Z_TURQ_FV01_1-hour-avg.nc differ diff --git a/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T030000Z_TURQ_FV01_1-hour-avg.nc b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T030000Z_TURQ_FV01_1-hour-avg.nc new file mode 100644 index 0000000..e394921 Binary files /dev/null and b/test_aodn_cloud_optimised/resources/IMOS_ACORN_V_20240101T030000Z_TURQ_FV01_1-hour-avg.nc differ diff --git a/test_aodn_cloud_optimised/resources/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.schema b/test_aodn_cloud_optimised/resources/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.schema new file mode 100644 index 0000000..4104a5a --- /dev/null +++ b/test_aodn_cloud_optimised/resources/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.schema @@ -0,0 +1,230 @@ +{ + "TIME": { + "type": "timestamp[ns]", + "axis": "T", + "comment": "timeOffsetPP: TIME values and time_coverage_start/end global attributes have been applied the following offset : -10 hours.", + "long_name": "time", + "standard_name": "time", + "valid_max": 90000.0, + "valid_min": 0.0 + }, + "TIMESERIES": { + "type": "int32", + "cf_role": "timeseries_id", + "long_name": "unique_identifier_for_each_timeseries_feature_instance_in_this_file" + }, + "LATITUDE": { + "type": "double", + "axis": "Y", + "long_name": "latitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "latitude", + "units": "degrees_north", + "valid_max": 90.0, + "valid_min": -90.0 + }, + "LONGITUDE": { + "type": "double", + "axis": "X", + "long_name": "longitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "longitude", + "units": "degrees_east", + "valid_max": 180.0, + "valid_min": -180.0 + }, + "NOMINAL_DEPTH": { + "type": "float", + "axis": "Z", + "long_name": "nominal depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0 + }, + "CNDC": { + "type": "float", + "ancillary_variables": "CNDC_quality_control", + "long_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity", + "units": "S m-1", + "valid_max": 50000.0, + "valid_min": 0.0 + }, + "CNDC_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_electrical_conductivity", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": " ", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_electrical_conductivity status_flag" + }, + "TEMP": { + "type": "float", + "ancillary_variables": "TEMP_quality_control", + "long_name": "sea_water_temperature", + "standard_name": "sea_water_temperature", + "units": "degrees_Celsius", + "valid_max": 40.0, + "valid_min": -2.5 + }, + "TEMP_quality_control": { + "type": "float", + "comment": "Data values at TIME from 2021/04/29 01:55:01 UTC to 2021/04/29 01:55:01 UTC manually flagged as Bad_data : Instrument instability", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_temperature", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "B", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_temperature status_flag" + }, + "PSAL": { + "type": "float", + "ancillary_variables": "PSAL_quality_control", + "long_name": "sea_water_practical_salinity", + "standard_name": "sea_water_practical_salinity", + "units": "1", + "valid_max": 41.0, + "valid_min": 2.0 + }, + "PSAL_quality_control": { + "type": "float", + "comment": "Data values at TIME from 2021/05/24 03:45:01 UTC to 2021/05/24 03:45:01 UTC manually flagged as Bad_data : spike. Data values at TIME from 2021/04/29 01:55:01 UTC to 2021/04/29 01:55:01 UTC manually flagged as Bad_data : Instrument instability", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_practical_salinity", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "B", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_practical_salinity status_flag" + }, + "PRES_REL": { + "type": "float", + "ancillary_variables": "PRES_REL_quality_control", + "applied_offset": -10.135296821594238, + "long_name": "sea_water_pressure_due_to_sea_water", + "standard_name": "sea_water_pressure_due_to_sea_water", + "units": "dbar", + "valid_max": 12000.0, + "valid_min": -15.0 + }, + "PRES_REL_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_pressure_due_to_sea_water", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "A", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_pressure_due_to_sea_water status_flag" + }, + "DEPTH": { + "type": "float", + "ancillary_variables": "DEPTH_quality_control", + "long_name": "actual depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0 + }, + "DEPTH_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for depth", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "A", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "depth status_flag" + }, + "DENS": { + "type": "float", + "ancillary_variables": "DENS_quality_control", + "long_name": "sea_water_density", + "standard_name": "sea_water_density", + "units": "kg m-3" + }, + "DENS_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_density", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": " ", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_density status_flag" + } +} \ No newline at end of file diff --git a/test_aodn_cloud_optimised/resources/aatams_acoustic_tagging.json b/test_aodn_cloud_optimised/resources/aatams_acoustic_tagging.json index ed4dc2d..da9b102 100644 --- a/test_aodn_cloud_optimised/resources/aatams_acoustic_tagging.json +++ b/test_aodn_cloud_optimised/resources/aatams_acoustic_tagging.json @@ -2,6 +2,16 @@ "dataset_name": "aatams_acoustic_tagging", "logger_name": "aatams_acoustic_tagging", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [4, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "4a97bd11-e821-4682-8b20-cb69201f3223", "gattrs_to_variables": [], "partition_keys": ["transmitter_id", "timestamp", "polygon"], diff --git a/test_aodn_cloud_optimised/resources/acorn_gridded_qc_main.json b/test_aodn_cloud_optimised/resources/acorn_gridded_qc_main.json new file mode 100644 index 0000000..3ba67de --- /dev/null +++ b/test_aodn_cloud_optimised/resources/acorn_gridded_qc_main.json @@ -0,0 +1,136 @@ +{ + "dataset_name": "acorn_gridded_qc", + "logger_name": "acorn_gridded_qc", + "cloud_optimised_format": "zarr", + "cluster_options" : { + "n_workers": [2, 8], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.medium", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + "cluster_config" : { + "n_workers": [0, 6], + "scheduler_vm_types": "t3.medium" + }, + "metadata_uuid": "", + "dimensions": { + "time": {"name": "TIME", + "chunk": 1500, + "rechunk": true}, + "latitude": {"name": "J", + "chunk": 60}, + "longitude": {"name": "I", + "chunk": 59} + }, + "var_template_shape": "UCUR", + "vars_to_drop_no_common_dimension": ["I", "J", "LATITUDE", "LONGITUDE", "GDOP"], + "schema": { + "TIME": {"type": "datetime64[ns]"}, + "I": {"type": "int32"}, + "J": {"type": "int32"}, + "LATITUDE": {"type": "float64"}, + "LONGITUDE": {"type": "float64"}, + "GDOP": {"type": "float32"}, + "UCUR": {"type": "float32"}, + "VCUR": {"type": "float32"}, + "UCUR_sd": {"type": "float32"}, + "VCUR_sd": {"type": "float32"}, + "NOBS1": {"type": "float32"}, + "NOBS2": {"type": "float32"}, + "UCUR_quality_control": {"type": "float32"}, + "VCUR_quality_control": {"type": "float32"} + }, + "dataset_gattrs": { + "title": "Temperature logger" + }, + "aws_opendata_registry": { + "Name": "", + "Description": "", + "Documentation": "", + "Contact": "", + "ManagedBy": "", + "UpdateFrequency": "", + "Tags": [], + "License": "", + "Resources": [ + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "", + "Explore": [] + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + } + ], + "DataAtWork": { + "Tutorials": [ + { + "Title": "", + "URL": "", + "Services": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Tools & Applications": [ + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Publications": [ + { + "Title": "", + "URL": "", + "AuthorName": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "" + } + ] + } + } +} diff --git a/test_aodn_cloud_optimised/resources/acorn_gridded_qc_turq.json b/test_aodn_cloud_optimised/resources/acorn_gridded_qc_turq.json new file mode 100644 index 0000000..bd2c5f1 --- /dev/null +++ b/test_aodn_cloud_optimised/resources/acorn_gridded_qc_turq.json @@ -0,0 +1,6 @@ +{ + "dataset_name": "acorn_gridded_qc_turq", + "parent_config": "acorn_gridded_qc_main.json", + "logger_name": "acorn_gridded_qc_turq", + "metadata_uuid": "" +} diff --git a/test_aodn_cloud_optimised/resources/anmn_ctd_ts_fv01.json b/test_aodn_cloud_optimised/resources/anmn_ctd_ts_fv01.json new file mode 100644 index 0000000..7f80e5d --- /dev/null +++ b/test_aodn_cloud_optimised/resources/anmn_ctd_ts_fv01.json @@ -0,0 +1,365 @@ +{ + "dataset_name": "anmn_ctd_ts_fv01", + "logger_name": "anmn_ctd_ts_fv01", + "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + "metadata_uuid": "7b901002-b1dc-46c3-89f2-b4951cedca48", + "gattrs_to_variables": [ + "site_code" + ], + "partition_keys": [ + "site_code", + "timestamp", + "polygon" + ], + "time_extent": { + "time": "TIME", + "partition_timestamp_period": "Q" + }, + "spatial_extent": { + "lat": "LATITUDE", + "lon": "LONGITUDE", + "spatial_resolution": 5 + }, + "schema": { + "TIME": { + "type": "timestamp[ns]", + "axis": "T", + "comment": "timeOffsetPP: TIME values and time_coverage_start/end global attributes have been applied the following offset : -10 hours.", + "long_name": "time", + "standard_name": "time", + "valid_max": 90000.0, + "valid_min": 0.0 + }, + "TIMESERIES": { + "type": "int32", + "cf_role": "timeseries_id", + "long_name": "unique_identifier_for_each_timeseries_feature_instance_in_this_file" + }, + "LATITUDE": { + "type": "double", + "axis": "Y", + "long_name": "latitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "latitude", + "units": "degrees_north", + "valid_max": 90.0, + "valid_min": -90.0 + }, + "LONGITUDE": { + "type": "double", + "axis": "X", + "long_name": "longitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "longitude", + "units": "degrees_east", + "valid_max": 180.0, + "valid_min": -180.0 + }, + "NOMINAL_DEPTH": { + "type": "float", + "axis": "Z", + "long_name": "nominal depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0 + }, + "CNDC": { + "type": "float", + "ancillary_variables": "CNDC_quality_control", + "long_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity", + "units": "S m-1", + "valid_max": 50000.0, + "valid_min": 0.0 + }, + "CNDC_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_electrical_conductivity", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": " ", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_electrical_conductivity status_flag" + }, + "TEMP": { + "type": "float", + "ancillary_variables": "TEMP_quality_control", + "long_name": "sea_water_temperature", + "standard_name": "sea_water_temperature", + "units": "degrees_Celsius", + "valid_max": 40.0, + "valid_min": -2.5 + }, + "TEMP_quality_control": { + "type": "float", + "comment": "Data values at TIME from 2021/04/29 01:55:01 UTC to 2021/04/29 01:55:01 UTC manually flagged as Bad_data : Instrument instability", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_temperature", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "B", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_temperature status_flag" + }, + "PSAL": { + "type": "float", + "ancillary_variables": "PSAL_quality_control", + "long_name": "sea_water_practical_salinity", + "standard_name": "sea_water_practical_salinity", + "units": "1", + "valid_max": 41.0, + "valid_min": 2.0 + }, + "PSAL_quality_control": { + "type": "float", + "comment": "Data values at TIME from 2021/05/24 03:45:01 UTC to 2021/05/24 03:45:01 UTC manually flagged as Bad_data : spike. Data values at TIME from 2021/04/29 01:55:01 UTC to 2021/04/29 01:55:01 UTC manually flagged as Bad_data : Instrument instability", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_practical_salinity", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "B", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_practical_salinity status_flag" + }, + "PRES_REL": { + "type": "float", + "ancillary_variables": "PRES_REL_quality_control", + "applied_offset": -10.135296821594238, + "long_name": "sea_water_pressure_due_to_sea_water", + "standard_name": "sea_water_pressure_due_to_sea_water", + "units": "dbar", + "valid_max": 12000.0, + "valid_min": -15.0 + }, + "PRES_REL_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_pressure_due_to_sea_water", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "A", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_pressure_due_to_sea_water status_flag" + }, + "DEPTH": { + "type": "float", + "ancillary_variables": "DEPTH_quality_control", + "long_name": "actual depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0 + }, + "DEPTH_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for depth", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": "A", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "depth status_flag" + }, + "DENS": { + "type": "float", + "ancillary_variables": "DENS_quality_control", + "long_name": "sea_water_density", + "standard_name": "sea_water_density", + "units": "kg m-3" + }, + "DENS_quality_control": { + "type": "float", + "flag_meanings": "No_QC_performed Good_data Probably_good_data Bad_data_that_are_potentially_correctable Bad_data Value_changed Not_used Not_used Not_used Missing_value", + "flag_values": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "long_name": "quality flag for sea_water_density", + "quality_control_conventions": "IMOS standard flags", + "quality_control_global": " ", + "quality_control_global_conventions": "Argo reference table 2a (see http://www.cmar.csiro.au/argo/dmqc/user_doc/QC_flags.html), applied on data in position only (between global attributes time_deployment_start and time_deployment_end)", + "standard_name": "sea_water_density status_flag" + }, + "timestamp": { + "type": "int64" + }, + "polygon": { + "type": "string" + }, + "site_code": { + "type": "string" + }, + "filename": { + "type": "string" + } + }, + "dataset_gattrs": { + "title": "ANMN CTD timeseries" + }, + "force_old_pq_del": true, + "aws_opendata_registry": { + "Name": "ANMN CTD timeseries", + "Description": "", + "Documentation": "https://catalogue.aodn.org.au/geonetwork/srv/eng/catalog.search#/metadata/7b901002-b1dc-46c3-89f2-b4951cedca48", + "Contact": "", + "ManagedBy": "", + "UpdateFrequency": "", + "Tags": [], + "License": "", + "Resources": [ + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "", + "Explore": [] + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + } + ], + "DataAtWork": { + "Tutorials": [ + { + "Title": "", + "URL": "", + "Services": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Tools & Applications": [ + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Publications": [ + { + "Title": "IMOS - Australian National Mooring Network (ANMN) - CTD Profiles", + "URL": "https://catalogue.aodn.org.au/geonetwork/srv/eng/catalog.search#/metadata/7b901002-b1dc-46c3-89f2-b4951cedca48", + "AuthorName": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "" + } + ] + } + } +} diff --git a/test_aodn_cloud_optimised/resources/ardc_wave_nrt.json b/test_aodn_cloud_optimised/resources/ardc_wave_nrt.json index ff77ea4..ae21e4f 100644 --- a/test_aodn_cloud_optimised/resources/ardc_wave_nrt.json +++ b/test_aodn_cloud_optimised/resources/ardc_wave_nrt.json @@ -1,126 +1,291 @@ { - "dataset_name": "ardc_wave_nrt", - "logger_name": "ardc_wave_nrt", - "cloud_optimised_format": "parquet", - "metadata_uuid": "2807f3aa-4db0-4924-b64b-354ae8c10b58", - "gattrs_to_variables" : ["site_name", "water_depth", "wmo_id"], - "partition_keys": ["site_name", "timestamp", "polygon"], - "time_extent": { - "time": "TIME", - "partition_timestamp_period": "M" - }, - "spatial_extent": { - "lat": "LATITUDE", - "lon": "LONGITUDE", - "spatial_resolution": 5 - }, - "schema" : { - "timeSeries": {"type": "int32"}, - "LATITUDE": {"type": "double"}, - "LONGITUDE": {"type": "double"}, - "WHTH": {"type": "float"}, - "WPMH": {"type": "float"}, - "WMXH": {"type": "float"}, - "WPPE": {"type": "float"}, - "WPDI": {"type": "float"}, - "WPDS": {"type": "float"}, - "WAVE_quality_control": {"type": "float"}, - "water_depth": {"type": "int64"}, - "wmo_id": {"type": "string"}, - "timestamp": {"type": "int64"}, - "polygon": {"type": "string"}, - "site_name": {"type": "string"}, - "filename": {"type": "string"}, - "TIME": {"type": "timestamp[ns]"} - }, - "dataset_gattrs": { - "title": "ARDC" - }, - "force_old_pq_del": true, - "aws_opendata_registry": { - "Name": "", - "Description": "", - "Documentation": "", - "Contact": "", - "ManagedBy": "", - "UpdateFrequency": "", - "Tags": [], - "License": "", - "Resources": [ + "dataset_name": "ardc_wave_nrt", + "logger_name": "ardc_wave_nrt", + "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, + "metadata_uuid": "2807f3aa-4db0-4924-b64b-354ae8c10b58", + "gattrs_to_variables": [ + "site_name", + "water_depth", + "wmo_id" + ], + "partition_keys": [ + "site_name", + "timestamp", + "polygon" + ], + "time_extent": { + "time": "TIME", + "partition_timestamp_period": "M" + }, + "spatial_extent": { + "lat": "LATITUDE", + "lon": "LONGITUDE", + "spatial_resolution": 5 + }, + "schema": { + "timeSeries": { + "type": "int32", + "long_name": "unique identifier for each feature instance", + "cf_role": "timeseries_id" + }, + "TIME": { + "type": "timestamp[ns]", + "standard_name": "time", + "long_name": "time", + "axis": "T", + "valid_min": 0.0, + "valid_max": 90000.0 + }, + "LATITUDE": { + "type": "double", + "standard_name": "latitude", + "long_name": "latitude", + "units": "degrees_north", + "axis": "Y", + "valid_min": -90.0, + "valid_max": 90.0, + "reference_datum": "WGS84 coordinate reference system; EPSG:4326" + }, + "LONGITUDE": { + "type": "double", + "standard_name": "longitude", + "long_name": "longitude", + "units": "degrees_east", + "axis": "X", + "valid_min": -180.0, + "valid_max": 180.0, + "reference_datum": "WGS84 coordinate reference system; EPSG:4326" + }, + "WPMH": { + "type": "double", + "standard_name": "sea_surface_wave_mean_period", + "long_name": "sea surface wave mean period", + "units": "s", + "valid_min": 0.0, + "valid_max": 50.0, + "method": "Time domain analysis", + "ancillary_variable": "WAVE_quality_control" + }, + "WMXH": { + "type": "double", + "standard_name": "sea_surface_wave_maximum_height", + "long_name": "sea surface wave maximum height", + "units": "m", + "valid_min": 0.0, + "valid_max": 100.0, + "method": "Time domain analysis", + "ancillary_variable": "WAVE_quality_control" + }, + "WPPE": { + "type": "double", + "standard_name": "sea_surface_wave_period_at_variance_spectral_density_maximum", + "long_name": "spectral peak wave period", + "units": "s", + "valid_min": 0.0, + "valid_max": 50.0, + "method": "Spectral analysis method", + "ancillary_variable": "WAVE_quality_control" + }, + "WPDI": { + "type": "double", + "standard_name": "sea_surface_wave_from_direction_at_variance_spectral_density_maximum", + "long_name": "direction of the dominant wave", + "units": "degree", + "reference_datum": "true north", + "valid_min": 0.0, + "valid_max": 360.0, + "method": "Spectral analysis method", + "ancillary_variable": "WAVE_quality_control" + }, + "WPDS": { + "type": "double", + "standard_name": "sea_surface_wave_directional_spread_at_variance_spectral_density_maximum", + "long_name": "directional spread of the dominant wave", + "units": "degree", + "reference_datum": "true north", + "valid_min": 0.0, + "valid_max": 360.0, + "method": "Spectral analysis method", + "ancillary_variable": "WAVE_quality_control" + }, + "WAVE_quality_control": { + "type": "float", + "long_name": "primary Quality Control flag for wave variables", + "valid_min": 1, + "valid_max": 9, + "flag_values": [ + 1, + 2, + 3, + 4, + 9 + ], + "flag_meanings": "good not_evaluated questionable bad missing", + "quality_control_convention": "Ocean Data Standards, UNESCO 2013 - IOC Manuals and Guides, 54, Volume 3 Version 1" + }, + "WSSH": { + "type": "double", + "ancillary_variable": "WAVE_quality_control", + "long_name": "sea surface wave spectral significant height", + "method": "Spectral analysis method", + "standard_name": "sea_surface_wave_significant_height", + "units": "m", + "valid_max": 100.0, + "valid_min": 0.0 + }, + "WPFM": { + "type": "double", + "ancillary_variable": "WAVE_quality_control", + "long_name": "sea surface wave spectral mean period", + "method": "Spectral analysis method", + "standard_name": "sea_surface_wave_mean_period_from_variance_spectral_density_first_frequency_moment", + "units": "s", + "valid_max": 50.0, + "valid_min": 0.0 + }, + "WMDS": { + "type": "double", + "ancillary_variable": "WAVE_quality_control", + "long_name": "spectral sea surface wave mean directional spread", + "method": "Spectral analysis method", + "positive": "clockwise", + "standard_name": "sea_surface_wave_directional_spread", + "units": "Degrees", + "valid_max": 360.0, + "valid_min": 0.0 + }, + "SSWMD": { + "type": "double", + "ancillary_variable": "WAVE_quality_control", + "comment": "Direction (related to the magnetic north) from which the mean period waves are coming from", + "compass_correction_applied": 13, + "long_name": "spectral sea surface wave mean direction", + "magnetic_declination": 12.86, + "method": "Spectral analysis method", + "positive": "clockwise", + "reference_datum": "true north", + "standard_name": "sea_surface_wave_from_direction", + "units": "Degrees", + "valid_max": 360.0, + "valid_min": 0.0 + }, + "water_depth": { + "type": "int64" + }, + "wmo_id": { + "type": "string" + }, + "timestamp": { + "type": "int64" + }, + "polygon": { + "type": "string" + }, + "site_name": { + "type": "string" + }, + "filename": { + "type": "string" + } + }, + "dataset_gattrs": { + "title": "ARDC" + }, + "force_old_pq_del": true, + "aws_opendata_registry": { + "Name": "", + "Description": "", + "Documentation": "", + "Contact": "", + "ManagedBy": "", + "UpdateFrequency": "", + "Tags": [], + "License": "", + "Resources": [ + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "", + "Explore": [] + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + }, + { + "Description": "", + "ARN": "", + "Region": "", + "Type": "" + } + ], + "DataAtWork": { + "Tutorials": [ { - "Description": "", - "ARN": "", - "Region": "", - "Type": "", - "Explore": [] + "Title": "", + "URL": "", + "Services": "", + "AuthorName": "", + "AuthorURL": "" }, { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" }, { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" + } + ], + "Tools & Applications": [ + { + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" }, { - "Description": "", - "ARN": "", - "Region": "", - "Type": "" + "Title": "", + "URL": "", + "AuthorName": "", + "AuthorURL": "" } ], - "DataAtWork": { - "Tutorials": [ - { - "Title": "", - "URL": "", - "Services": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Tools & Applications": [ - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "", - "AuthorURL": "" - } - ], - "Publications": [ - { - "Title": "", - "URL": "", - "AuthorName": "" - }, - { - "Title": "", - "URL": "", - "AuthorName": "" - } - ] - } + "Publications": [ + { + "Title": "", + "URL": "", + "AuthorName": "" + }, + { + "Title": "", + "URL": "", + "AuthorName": "" + } + ] } + } } diff --git a/test_aodn_cloud_optimised/resources/argo_core.json b/test_aodn_cloud_optimised/resources/argo_core.json index db6f54f..f03acaf 100644 --- a/test_aodn_cloud_optimised/resources/argo_core.json +++ b/test_aodn_cloud_optimised/resources/argo_core.json @@ -2,6 +2,16 @@ "dataset_name": "argo_core", "logger_name": "argo_core", "cloud_optimised_format": "parquet", + "cluster_options" : { + "n_workers": [8, 20], + "scheduler_vm_types": "t3.small", + "worker_vm_types": "t3.large", + "allow_ingress_from": "me", + "compute_purchase_option": "spot_with_fallback", + "worker_options": { + "nthreads": 8, + "memory_limit": "32GB" } + }, "metadata_uuid": "4402cb50-e20a-44ee-93e6-4728259250d2", "gattrs_to_variables": [], "partition_keys": [ @@ -282,6 +292,17 @@ "long_name": "quality flag", "conventions": "Argo reference table 2" }, + "PSAL_ADJUSTED": { + "type": "float", + "long_name": "Practical salinity", + "standard_name": "sea_water_salinity", + "units": "psu", + "valid_min": 2.0, + "valid_max": 41.0, + "C_format": "%9.3f", + "FORTRAN_format": "F9.3", + "resolution": 0.0010000000474974513 + }, "PSAL_ADJUSTED_QC": { "type": "string", "long_name": "quality flag", diff --git a/test_aodn_cloud_optimised/resources/common.json b/test_aodn_cloud_optimised/resources/common.json index 1ffcbd8..79ad4cb 100644 --- a/test_aodn_cloud_optimised/resources/common.json +++ b/test_aodn_cloud_optimised/resources/common.json @@ -1,5 +1,5 @@ { "BUCKET_RAW_DEFAULT": "imos-data", "BUCKET_OPTIMISED_DEFAULT": "imos-data-lab-optimised", - "ROOT_PREFIX_CLOUD_OPTIMISED_PATH": "parquet/loz_test" + "ROOT_PREFIX_CLOUD_OPTIMISED_PATH": "testing" } diff --git a/test_aodn_cloud_optimised/test_ardcwave.py b/test_aodn_cloud_optimised/test_ardcwave.py deleted file mode 100644 index dd72930..0000000 --- a/test_aodn_cloud_optimised/test_ardcwave.py +++ /dev/null @@ -1,168 +0,0 @@ -import os -import shutil -import tempfile -import unittest -from unittest.mock import patch, MagicMock - -import pyarrow as pa -import pandas as pd -from numpy.testing import assert_array_equal - -from aodn_cloud_optimised.lib.GenericParquetHandler import GenericHandler -from aodn_cloud_optimised.lib.config import load_dataset_config - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Specify the filename relative to the current directory -TEST_FILE_NC = os.path.join( - ROOT_DIR, "resources", "BOM_20240301_CAPE-SORELL_RT_WAVE-PARAMETERS_monthly.nc" -) - -CONFIG_JSON = os.path.join(ROOT_DIR, "resources", "ardc_wave_nrt.json") - - -class TestGenericHandler(unittest.TestCase): - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def setUp(self, mock_get_s3_raw_obj): - # Create a temporary directory - self.tmp_dir = tempfile.mkdtemp() - - # Copy the test NetCDF file to the temporary directory - self.tmp_nc_path = os.path.join(self.tmp_dir, os.path.basename(TEST_FILE_NC)) - shutil.copy(TEST_FILE_NC, self.tmp_nc_path) - - dataset_config = load_dataset_config(CONFIG_JSON) - - dataset_config_no_schema = { - "dataset_name": "dummy_table_name", - "cloud_optimised_format": "parquet", - "gattrs_to_variables": ["site_name", "water_depth", "wmo_id"], - "partition_keys": ["site_name", "timestamp"], - "time_extent": {"time": "TIME", "partition_timestamp_period": "M"}, - "spatial_extent": { - "lat": "LATITUDE", - "lon": "LONGITUDE", - "spatial_resolution": 5, - }, - "schema": {}, - "dataset_gattrs": {"title": "ARDC glider"}, - "metadata_uuid": "b12b3-123bb-iijww", - "force_old_pq_del": False, - } - - self.handler = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config_no_schema, - force_old_pq_del=False, - ) - - self.handler_with_schema = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config, - ) - - # modify the path of the parquet dataset output - self.handler.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - self.handler_with_schema.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - - # Create a mock object for xr.open_dataset - self.mock_open_dataset = MagicMock() - - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_get_s3_raw_obj(self, mock_get_s3_raw_obj): - with patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client"): - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - tmp_filepath = self.handler.get_s3_raw_obj() - self.assertEqual(tmp_filepath, self.tmp_nc_path) - - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_data_to_df_ds(self, mock_get_s3_raw_obj): - # Configure the mock object to return the path of the copied NetCDF file - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - - # Call the preprocess_data method - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Assert that ds.site_code is equal to the expected value - assert_array_equal(ds.site_name, "Cape Sorell") - - def test_add_columns_df_and_bad_timestamps(self): - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Call the method to add columns to the DataFrame - result_df = self.handler._add_timestamp_df(df) - result_df = self.handler._add_columns_df(result_df, ds) - - # now we call the next function to remove the bad timestamp values ( which does also a reindexing) - result_df = self.handler._rm_bad_timestamp_df(result_df) - self.assertEqual(result_df["timestamp"][0], 1709251200.0) - - def test_create_data_parquet_with_mocked_parameters_and_with_schema(self): - # Mock the return value of self.get_partition_parameters_data() - # with patch.object(self.handler, 'get_partition_parameters_data', return_value=["site_name"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler_with_schema.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - self.handler_with_schema.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler_with_schema.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - self.assertNotIn( - "DUMMY_VAR_NOT_IN", parquet_dataset.columns - ) # make sure the variable is removed - self.assertIn("WHTH", parquet_dataset.columns) - - self.assertEqual(parquet_dataset["timestamp"][0], 1709251200.0) - self.assertEqual( - parquet_dataset["TIME"][0], pd.Timestamp("2024-03-01 01:30:00") - ) - - # Testing the metadata sidecar file - # Reading the metadata file of the dataset (at the root) - parquet_meta_file_path = os.path.join( - self.handler_with_schema.cloud_optimised_output_path, "_common_metadata" - ) - parquet_meta = pa.parquet.read_schema(parquet_meta_file_path) - import json - - # horrible ... but got to be done. The dictionary of metadata has to be a dictionnary with byte keys and byte values. - # meaning that we can't have nested dictionaries ... - decoded_meta = { - key.decode("utf-8"): json.loads(value.decode("utf-8").replace("'", '"')) - for key, value in parquet_meta.metadata.items() - } - - self.assertEqual( - decoded_meta["dataset_metadata"]["metadata_uuid"], - "2807f3aa-4db0-4924-b64b-354ae8c10b58", - ) - self.assertEqual(decoded_meta["dataset_metadata"]["title"], "ARDC") - - def tearDown(self): - # Remove the temporary directory and its contents - shutil.rmtree(self.tmp_dir) - - -if __name__ == "__main__": - unittest.main() diff --git a/test_aodn_cloud_optimised/test_argohandler.py b/test_aodn_cloud_optimised/test_argohandler.py deleted file mode 100644 index e783491..0000000 --- a/test_aodn_cloud_optimised/test_argohandler.py +++ /dev/null @@ -1,184 +0,0 @@ -import os -import shutil -import tempfile -import unittest -from unittest.mock import patch, MagicMock - -import numpy as np -import pandas as pd -from numpy.testing import assert_array_equal - -from aodn_cloud_optimised.lib.ArgoHandler import ArgoHandler -from aodn_cloud_optimised.lib.config import load_dataset_config - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Specify the filename relative to the current directory -TEST_FILE_NC = os.path.join(ROOT_DIR, "resources", "2902093_prof.nc") -TEST_FILE_BAD_GEOM_NC = os.path.join(ROOT_DIR, "resources", "5905017_prof.nc") - -CONFIG_JSON = os.path.join(ROOT_DIR, "resources", "argo_core.json") - - -class TestGenericHandler(unittest.TestCase): - @patch("aodn_cloud_optimised.lib.ArgoHandler.ArgoHandler.get_s3_raw_obj") - def setUp(self, mock_get_s3_raw_obj): - # Create a temporary directory - self.tmp_dir = tempfile.mkdtemp() - - # Copy the test NetCDF file to the temporary directory - self.tmp_nc_path = os.path.join(self.tmp_dir, os.path.basename(TEST_FILE_NC)) - shutil.copy(TEST_FILE_NC, self.tmp_nc_path) - - self.tmp_nc_bad_geom_path = os.path.join( - self.tmp_dir, os.path.basename(TEST_FILE_BAD_GEOM_NC) - ) - shutil.copy(TEST_FILE_BAD_GEOM_NC, self.tmp_nc_bad_geom_path) - - dataset_config = load_dataset_config(CONFIG_JSON) - - self.handler = ArgoHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config, - ) - - self.handler_bad_geom = ArgoHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_bad_geom_path), - dataset_config=dataset_config, - ) - # modify the path of the parquet dataset output - self.handler.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_table_name" - ) - self.handler_bad_geom.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_table_name" - ) - - # Create a mock object for xr.open_dataset - self.mock_open_dataset = MagicMock() - - # test method inherited from super - @patch("aodn_cloud_optimised.lib.ArgoHandler.ArgoHandler.get_s3_raw_obj") - def test_get_s3_raw_obj(self, mock_get_s3_raw_obj): - with patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client"): - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - tmp_filepath = self.handler.get_s3_raw_obj() - self.assertEqual(tmp_filepath, self.tmp_nc_path) - - @patch("aodn_cloud_optimised.lib.ArgoHandler.ArgoHandler.get_s3_raw_obj") - def test_data_to_df_ds(self, mock_get_s3_raw_obj): - # Configure the mock object to return the path of the copied NetCDF file - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - - # Call the preprocess_data method - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Assert that ds.site_code is equal to the expected value - assert_array_equal(np.unique(ds.PLATFORM_NUMBER.values), np.array([2902093])) - - def test_add_columns_df_and_bad_timestamps(self): - # with patch.object(self.handler_no_schema, 'get_partition_parameters_data', return_value=["site_code"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Call the method to add columns to the DataFrame - result_df = self.handler._add_timestamp_df(df) - result_df = self.handler._add_columns_df(result_df, ds) - - # Check if the column are added with the correct values - self.assertEqual(result_df["filename"][0], os.path.basename(self.tmp_nc_path)) - self.assertEqual( - result_df["timestamp"][0], -9223372036.854776 - ) # This is a NAN value but all good! - - # now we call the next function to remove the bad timestamp values ( which does also a reindexing) - result_df = self.handler._rm_bad_timestamp_df(result_df) - self.assertEqual(result_df["timestamp"][0], 1356998400.0) - - @patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client") - def test_create_data_parquet(self, mock_boto3_client): - # Set up mock return values and inputs - mock_s3_client = mock_boto3_client.return_value - # Mock the return value of s3.download_file to simulate file download - mock_s3_client.download_file.return_value = self.tmp_nc_path - - # Call the get_s3_raw_obj method (which should now use the mocked behavior) - self.handler.get_s3_raw_obj() - - self.handler.tmp_input_file = self.tmp_nc_path # overwrite value in handler - - # Mock the return value of self.get_partition_parameters_data() - # with patch.object(self.handler_no_schema, 'get_partition_parameters_data', return_value=["PLATFORM_NUMBER"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - self.handler.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - self.assertEqual(parquet_dataset["timestamp"][0], 1356998400.0) - self.assertEqual( - parquet_dataset["JULD"][0], pd.Timestamp("2013-02-26 03:15:00") - ) - - # Assert the expected values in the Parquet dataset - self.assertNotIn( - "PSAL_ADJUSTED", parquet_dataset.columns - ) # make sure the variable is removed - self.assertIn("TEMP_ADJUSTED", parquet_dataset.columns) - - @patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client") - def test_create_data_parquet_bad_geom(self, mock_boto3_client): - # Set up mock return values and inputs - mock_s3_client = mock_boto3_client.return_value - # Mock the return value of s3.download_file to simulate file download - mock_s3_client.download_file.return_value = self.tmp_nc_bad_geom_path - - # Call the get_s3_raw_obj method (which should now use the mocked behavior) - self.handler_bad_geom.get_s3_raw_obj() - - self.handler_bad_geom.tmp_input_file = ( - self.tmp_nc_bad_geom_path - ) # overwrite value in handler - - # Mock the return value of self.get_partition_parameters_data() - # with patch.object(self.handler_no_schema, 'get_partition_parameters_data', return_value=["PLATFORM_NUMBER"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler_bad_geom.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - self.handler_bad_geom.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler_bad_geom.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - self.assertEqual(parquet_dataset["timestamp"][0], 1356998400.0) - self.assertEqual( - parquet_dataset["JULD"][0], pd.Timestamp("2013-02-26 03:15:00") - ) - - # Assert the expected values in the Parquet dataset - self.assertNotIn( - "PSAL_ADJUSTED", parquet_dataset.columns - ) # make sure the variable is removed - self.assertIn("TEMP_ADJUSTED", parquet_dataset.columns) - - def tearDown(self): - # Remove the temporary directory and its contents - shutil.rmtree(self.tmp_dir) - - -if __name__ == "__main__": - unittest.main() diff --git a/test_aodn_cloud_optimised/test_config.py b/test_aodn_cloud_optimised/test_config.py index 8ad53e4..b67d9b3 100644 --- a/test_aodn_cloud_optimised/test_config.py +++ b/test_aodn_cloud_optimised/test_config.py @@ -7,6 +7,13 @@ from aodn_cloud_optimised.lib.config import ( load_variable_from_file, load_variable_from_config, + load_dataset_config, +) + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +DATASET_CONFIG_NC_ACORN_JSON = os.path.join( + ROOT_DIR, "resources", "acorn_gridded_qc_turq.json" ) @@ -52,6 +59,13 @@ def test_load_variable_from_file_file_not_found(self): os.path.join(self.temp_dir, "non_existent_file.json"), "var1" ) + def test_load_parent_child_config(self): + dataset_acorn_netcdf_config = load_dataset_config(DATASET_CONFIG_NC_ACORN_JSON) + cloud_optimised_format = dataset_acorn_netcdf_config["cloud_optimised_format"] + self.assertEqual( + "zarr", cloud_optimised_format + ) # attribute only found in parent record + if __name__ == "__main__": unittest.main() diff --git a/test_aodn_cloud_optimised/test_generic_parquet_handler.py b/test_aodn_cloud_optimised/test_generic_parquet_handler.py new file mode 100644 index 0000000..8f67795 --- /dev/null +++ b/test_aodn_cloud_optimised/test_generic_parquet_handler.py @@ -0,0 +1,344 @@ +import json +import os +import unittest + +import boto3 +import pandas as pd +import pyarrow as pa +import s3fs +from moto import mock_aws +from moto.moto_server.threaded_moto_server import ThreadedMotoServer +from shapely import wkb +from shapely.geometry import Polygon + +from aodn_cloud_optimised.lib.GenericParquetHandler import GenericHandler +from aodn_cloud_optimised.lib.config import load_dataset_config +from aodn_cloud_optimised.lib.s3Tools import s3_ls +from unittest.mock import patch + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# Specify the filename relative to the current directory +TEST_FILE_NC_ANMN = os.path.join( + ROOT_DIR, + "resources", + "IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc", +) + +TEST_FILE_NC_ARDC = os.path.join( + ROOT_DIR, "resources", "BOM_20240301_CAPE-SORELL_RT_WAVE-PARAMETERS_monthly.nc" +) + +DUMMY_FILE = os.path.join(ROOT_DIR, "resources", "DUMMY.nan") +DUMMY_NC_FILE = os.path.join(ROOT_DIR, "resources", "DUMMY.nc") +TEST_CSV_FILE = os.path.join( + ROOT_DIR, "resources", "A69-1105-135_107799906_130722039.csv" +) +DATASET_CONFIG_CSV_AATAMS_JSON = os.path.join( + ROOT_DIR, "resources", "aatams_acoustic_tagging.json" +) + +DATASET_CONFIG_NC_ANMN_JSON = os.path.join( + ROOT_DIR, "resources", "anmn_ctd_ts_fv01.json" +) + +DATASET_CONFIG_NC_ARDC_JSON = os.path.join(ROOT_DIR, "resources", "ardc_wave_nrt.json") + + +@mock_aws +class TestGenericHandler(unittest.TestCase): + def setUp(self): + + # Create a mock S3 service + self.BUCKET_OPTIMISED_NAME = "imos-data-lab-optimised" + self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH = "testing" + self.s3 = boto3.client("s3", region_name="us-east-1") + self.s3.create_bucket(Bucket="imos-data") + self.s3.create_bucket(Bucket=self.BUCKET_OPTIMISED_NAME) + + # create moto server; needed for s3fs and parquet + self.server = ThreadedMotoServer(ip_address="127.0.0.1", port=5555) + + self.s3_fs = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "http://127.0.0.1:5555/", + "region_name": "us-east-1", + }, + ) + + self.server.start() + + # Make the "imos-data" bucket public + public_policy_imos_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::imos-data/*", + } + ], + } + + public_policy_cloud_optimised_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{self.BUCKET_OPTIMISED_NAME}/*", + } + ], + } + + self.s3.put_bucket_policy( + Bucket="imos-data", Policy=json.dumps(public_policy_imos_data) + ) + + self.s3.put_bucket_policy( + Bucket=self.BUCKET_OPTIMISED_NAME, + Policy=json.dumps(public_policy_cloud_optimised_data), + ) + + # Copy files to the mock S3 bucket + self.s3.put_object( + Bucket=self.BUCKET_OPTIMISED_NAME, Key="testing", Body="" + ) # empty file + self._upload_to_s3( + "imos-data", + f"good_nc_anmn/{os.path.basename(TEST_FILE_NC_ANMN)}", + TEST_FILE_NC_ANMN, + ) + self._upload_to_s3( + "imos-data", f"dummy/{os.path.basename(DUMMY_FILE)}", DUMMY_FILE + ) + self._upload_to_s3( + "imos-data", f"dummy_nc/{os.path.basename(DUMMY_NC_FILE)}", DUMMY_NC_FILE + ) + self._upload_to_s3( + "imos-data", f"good_csv/{os.path.basename(TEST_CSV_FILE)}", TEST_CSV_FILE + ) + self._upload_to_s3( + "imos-data", + f"good_nc_ardc/{os.path.basename(TEST_FILE_NC_ARDC)}", + TEST_FILE_NC_ARDC, + ) + + dataset_anmn_netcdf_config = load_dataset_config(DATASET_CONFIG_NC_ANMN_JSON) + self.handler_nc_anmn_file = GenericHandler( + optimised_bucket_name=self.BUCKET_OPTIMISED_NAME, + root_prefix_cloud_optimised_path=self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH, + dataset_config=dataset_anmn_netcdf_config, + clear_existing_data=True, + force_previous_parquet_deletion=True, + cluster_mode="local", + ) + + dataset_ardc_netcdf_config = load_dataset_config(DATASET_CONFIG_NC_ARDC_JSON) + self.handler_nc_ardc_file = GenericHandler( + optimised_bucket_name=self.BUCKET_OPTIMISED_NAME, + root_prefix_cloud_optimised_path=self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH, + dataset_config=dataset_ardc_netcdf_config, + clear_existing_data=True, + force_previous_parquet_deletion=True, + cluster_mode="local", + ) + + dataset_aatams_csv_config = load_dataset_config(DATASET_CONFIG_CSV_AATAMS_JSON) + self.handler_csv_file = GenericHandler( + optimised_bucket_name=self.BUCKET_OPTIMISED_NAME, + root_prefix_cloud_optimised_path=self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH, + dataset_config=dataset_aatams_csv_config, + clear_existing_data=True, + cluster_mode="local", + ) + + def _upload_to_s3(self, bucket_name, key, file_path): + with open(file_path, "rb") as f: + self.s3.upload_fileobj(f, bucket_name, key) + + def tearDown(self): + self.server.stop() + + def test_parquet_nc_anmn_handler(self): + nc_obj_ls = s3_ls("imos-data", "good_nc_anmn") + + # 1st pass + with patch.object(self.handler_nc_anmn_file, "s3_fs", new=self.s3_fs): + self.handler_nc_anmn_file.to_cloud_optimised([nc_obj_ls[0]]) + + # 2nd pass, process the same file a second time. Should be deleted + # TODO: Not a big big deal breaker, but got an issue which should be fixed in the try except only for the unittest + # 2024-07-01 16:04:54,721 - INFO - GenericParquetHandler.py:824 - delete_existing_matching_parquet - No files to delete: GetFileInfo() yielded path 'imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/site_code=SYD140/timestamp=1625097600/polygon=01030000000100000005000000000000000020624000000000008041C0000000000060634000000000008041C0000000000060634000000000000039C0000000000020624000000000000039C0000000000020624000000000008041C0/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc-0.parquet', which is outside base dir 's3://imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/' + with patch.object(self.handler_nc_anmn_file, "s3_fs", new=self.s3_fs): + self.handler_nc_anmn_file.to_cloud_optimised_single(nc_obj_ls[0]) + + # read parquet + dataset_config = load_dataset_config(DATASET_CONFIG_NC_ANMN_JSON) + dataset_name = dataset_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.parquet/" + + parquet_dataset = pd.read_parquet( + dname, + engine="pyarrow", + storage_options={ + "client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"} + }, + ) + + self.assertNotIn("station_name", parquet_dataset.columns) + self.assertAlmostEqual(parquet_dataset["TEMP"][0], 13.2773, delta=1e-2) + + # Check if the column are added with the correct values + self.assertIn("site_code", parquet_dataset.columns) + self.assertEqual(parquet_dataset["site_code"].iloc[0], "SYD140") + + self.assertEqual( + parquet_dataset["filename"].iloc[0], os.path.basename(nc_obj_ls[0]) + ) + self.assertEqual(parquet_dataset["timestamp"].iloc[0], 1617235200.0) + + # The following section shows how the created polygon variable can be used to perform data queries. this adds significant overload, but is worth it + parquet_dataset["converted_polygon"] = parquet_dataset["polygon"].apply( + lambda x: wkb.loads(bytes.fromhex(x)) + ) + + # Define the predefined polygon + predefined_polygon_coords_out = [(150, -40), (155, -40), (155, -45), (150, -45)] + predefined_polygon_coords_in = [(150, -32), (155, -32), (155, -45), (150, -45)] + + predefined_polygon_out = Polygon(predefined_polygon_coords_out) + predefined_polygon_in = Polygon(predefined_polygon_coords_in) + + df_unique_polygon = parquet_dataset["converted_polygon"].unique()[0] + self.assertFalse(df_unique_polygon.intersects(predefined_polygon_out)) + self.assertTrue(df_unique_polygon.intersects(predefined_polygon_in)) + + # Testing the metadata sidecar file + # Reading the metadata file of the dataset (at the root) + parquet_meta_file_path = os.path.join( + self.handler_nc_anmn_file.cloud_optimised_output_path, "_common_metadata" + ) + parquet_meta = pa.parquet.read_schema( + parquet_meta_file_path, filesystem=self.s3_fs + ) + + # horrible ... but got to be done. The dictionary of metadata has to be a dictionnary with byte keys and byte values. + # meaning that we can't have nested dictionaries ... + decoded_meta = { + key.decode("utf-8"): json.loads(value.decode("utf-8").replace("'", '"')) + for key, value in parquet_meta.metadata.items() + } + + self.assertEqual(decoded_meta["LONGITUDE"]["axis"], "X") + self.assertEqual(decoded_meta["NOMINAL_DEPTH"]["standard_name"], "depth") + + # alternative way to access the metadata + # Create a dictionary where keys are the names and values are the elements + schema_dict = {obj.name: obj for obj in parquet_meta} + self.assertEqual( + schema_dict["TEMP"].metadata.get(b"standard_name"), b"sea_water_temperature" + ) + # other way to access the metadata + schema_dict = {obj.name: obj.metadata for obj in parquet_meta} + self.assertEqual( + schema_dict["TEMP"][b"standard_name"], b"sea_water_temperature" + ) + + def test_parquet_nc_generic_handler(self): + nc_obj_ls = s3_ls("imos-data", "good_nc_ardc") + + # 1st pass + with patch.object(self.handler_nc_ardc_file, "s3_fs", new=self.s3_fs): + self.handler_nc_ardc_file.to_cloud_optimised([nc_obj_ls[0]]) + + # 2nd pass, process the same file a second time. Should be deleted + # TODO: Not a big big deal breaker, but got an issue which should be fixed in the try except only for the unittest + # 2024-07-01 16:04:54,721 - INFO - GenericParquetHandler.py:824 - delete_existing_matching_parquet - No files to delete: GetFileInfo() yielded path 'imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/site_code=SYD140/timestamp=1625097600/polygon=01030000000100000005000000000000000020624000000000008041C0000000000060634000000000008041C0000000000060634000000000000039C0000000000020624000000000000039C0000000000020624000000000008041C0/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc-0.parquet', which is outside base dir 's3://imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/' + with patch.object(self.handler_nc_ardc_file, "s3_fs", new=self.s3_fs): + self.handler_nc_ardc_file.to_cloud_optimised_single(nc_obj_ls[0]) + + # read parquet + dataset_config = load_dataset_config(DATASET_CONFIG_NC_ARDC_JSON) + dataset_name = dataset_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.parquet/" + + parquet_dataset = pd.read_parquet( + dname, + engine="pyarrow", + storage_options={ + "client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"} + }, + ) + + self.assertEqual(parquet_dataset["timestamp"][0], 1709251200.0) + + self.assertNotIn( + "DUMMY_VAR_NOT_IN", parquet_dataset.columns + ) # make sure the variable is removed + self.assertNotIn( + "WHTH", parquet_dataset.columns + ) # removed on purpose to trigger "missing variable from provided pyarrow_schema config, please add to dataset config" + self.assertIn("WPMH", parquet_dataset.columns) + + self.assertEqual(parquet_dataset["timestamp"][0], 1709251200.0) + self.assertEqual( + parquet_dataset["TIME"][0], pd.Timestamp("2024-03-01 01:30:00") + ) + + parquet_meta_file_path = os.path.join( + self.handler_nc_ardc_file.cloud_optimised_output_path, "_common_metadata" + ) + parquet_meta = pa.parquet.read_schema( + parquet_meta_file_path, filesystem=self.s3_fs + ) + + # horrible ... but got to be done. The dictionary of metadata has to be a dictionnary with byte keys and byte values. + # meaning that we can't have nested dictionaries ... + decoded_meta = { + key.decode("utf-8"): json.loads(value.decode("utf-8").replace("'", '"')) + for key, value in parquet_meta.metadata.items() + } + + self.assertEqual( + decoded_meta["dataset_metadata"]["metadata_uuid"], + "2807f3aa-4db0-4924-b64b-354ae8c10b58", + ) + self.assertEqual(decoded_meta["dataset_metadata"]["title"], "ARDC") + + def test_parquet_csv_generic_handler(self): # , MockS3FileSystem): + csv_obj_ls = s3_ls("imos-data", "good_csv", suffix=".csv") + # with patch('s3fs.S3FileSystem', lambda anon, client_kwargs: s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": "http://127.0.0.1:5555/"})): + # MockS3FileSystem.return_value = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": "http://127.0.0.1:5555"}) + + # with mock_aws(aws_credentials): + # 1st pass, could have some errors distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat. + # Solution is the rerun the unittest + with patch.object(self.handler_csv_file, "s3_fs", new=self.s3_fs): + self.handler_csv_file.to_cloud_optimised([csv_obj_ls[0]]) + + # 2nd pass + with patch.object(self.handler_csv_file, "s3_fs", new=self.s3_fs): + self.handler_csv_file.to_cloud_optimised_single(csv_obj_ls[0]) + + # Read parquet dataset and check data is good! + dataset_config = load_dataset_config(DATASET_CONFIG_CSV_AATAMS_JSON) + dataset_name = dataset_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.parquet/" + + parquet_dataset = pd.read_parquet( + dname, + engine="pyarrow", + storage_options={ + "client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"} + }, + ) + + self.assertIn("station_name", parquet_dataset.columns) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_aodn_cloud_optimised/test_generic_zarr_handler.py b/test_aodn_cloud_optimised/test_generic_zarr_handler.py new file mode 100644 index 0000000..46f75d6 --- /dev/null +++ b/test_aodn_cloud_optimised/test_generic_zarr_handler.py @@ -0,0 +1,180 @@ +import json +import os +import unittest +from unittest.mock import patch + +import boto3 +import numpy as np +import pytest +import s3fs +import xarray as xr +from moto import mock_aws +from moto.moto_server.threaded_moto_server import ThreadedMotoServer + +from aodn_cloud_optimised.lib.GenericZarrHandler import GenericHandler +from aodn_cloud_optimised.lib.config import load_dataset_config +from aodn_cloud_optimised.lib.s3Tools import s3_ls + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# Specify the filename relative to the current directory +filenames = [ + "IMOS_ACORN_V_20240101T000000Z_TURQ_FV01_1-hour-avg.nc", + "IMOS_ACORN_V_20240101T010000Z_TURQ_FV01_1-hour-avg.nc", + "IMOS_ACORN_V_20240101T020000Z_TURQ_FV01_1-hour-avg.nc", + "IMOS_ACORN_V_20240101T030000Z_TURQ_FV01_1-hour-avg.nc", +] + +TEST_FILE_NC_ACORN = [ + os.path.join(ROOT_DIR, "resources", file_name) for file_name in filenames +] + +DATASET_CONFIG_NC_ACORN_JSON = os.path.join( + ROOT_DIR, "resources", "acorn_gridded_qc_turq.json" +) + + +@pytest.fixture(scope="function") +def mock_aws_server(): + with mock_aws(): + yield + + +@mock_aws +class TestGenericZarrHandler(unittest.TestCase): + def setUp(self): + # TODO: remove this abomination for unittesting. but it works. Only for zarr ! + os.environ["RUNNING_UNDER_UNITTEST"] = "true" + + # Create a mock S3 service + self.BUCKET_OPTIMISED_NAME = "imos-data-lab-optimised" + self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH = "testing" + self.s3 = boto3.client("s3", region_name="us-east-1") + self.s3.create_bucket(Bucket="imos-data") + self.s3.create_bucket(Bucket=self.BUCKET_OPTIMISED_NAME) + + # create moto server; needed for s3fs and parquet + self.server = ThreadedMotoServer(ip_address="127.0.0.1", port=5555) + + self.s3_fs = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "http://127.0.0.1:5555/", + "region_name": "us-east-1", + }, + ) + + self.server.start() + + # Make the "imos-data" bucket public + public_policy_imos_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::imos-data/*", + } + ], + } + + public_policy_cloud_optimised_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{self.BUCKET_OPTIMISED_NAME}/*", + } + ], + } + + self.s3.put_bucket_policy( + Bucket="imos-data", Policy=json.dumps(public_policy_imos_data) + ) + + self.s3.put_bucket_policy( + Bucket=self.BUCKET_OPTIMISED_NAME, + Policy=json.dumps(public_policy_cloud_optimised_data), + ) + + # Copy files to the mock S3 bucket + + for test_file in TEST_FILE_NC_ACORN: + self._upload_to_s3( + "imos-data", f"acorn/{os.path.basename(test_file)}", test_file + ) + + dataset_acorn_netcdf_config = load_dataset_config(DATASET_CONFIG_NC_ACORN_JSON) + self.handler_nc_acorn_file = GenericHandler( + optimised_bucket_name=self.BUCKET_OPTIMISED_NAME, + root_prefix_cloud_optimised_path=self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH, + dataset_config=dataset_acorn_netcdf_config, + # clear_existing_data=True, + cluster_mode="local", + ) + + def _upload_to_s3(self, bucket_name, key, file_path): + with open(file_path, "rb") as f: + self.s3.upload_fileobj(f, bucket_name, key) + + def tearDown(self): + self.server.stop() + del os.environ["RUNNING_UNDER_UNITTEST"] + + # TODO: find a solution to patch s3fs properly and not relying on changing the s3fs values in the code + def test_zarr_nc_acorn_handler(self): + nc_obj_ls = s3_ls("imos-data", "acorn") + + # 1st pass + # 2024-07-02 11:16:16,538 - INFO - GenericZarrHandler.py:381 - publish_cloud_optimised_fileset_batch - Writing data to new Zarr dataset + # 2024-07-02 11:16:19,366 - INFO - GenericZarrHandler.py:391 - publish_cloud_optimised_fileset_batch - Batch 1 processed and written to + + with patch.object(self.handler_nc_acorn_file, "s3_fs", new=self.s3_fs): + self.handler_nc_acorn_file.to_cloud_optimised(nc_obj_ls) + + # 2nd pass, process the same file a second time. Should be overwritten in ONE region slice + # 2024-07-02 11:16:21,649 - INFO - GenericZarrHandler.py:303 - publish_cloud_optimised_fileset_batch - Duplicate values of TIME + # 2024-07-02 11:16:21,650 - INFO - GenericZarrHandler.py:353 - publish_cloud_optimised_fileset_batch - Overwriting Zarr dataset in Region: {'TIME': slice(0, 4, None)}, Matching Indexes in new ds: [0 1 2 3] + # 2024-07-02 11:16:22,573 - INFO - GenericZarrHandler.py:391 - publish_cloud_optimised_fileset_batch - Batch 1 processed and written to + with patch.object(self.handler_nc_acorn_file, "s3_fs", new=self.s3_fs): + self.handler_nc_acorn_file.to_cloud_optimised(nc_obj_ls) + + # 3rd pass, create a non-contiguous list of files to reprocess. TWO region slices should happen. Look in the log + # output of the unittest as it's hard to test! + # output should be + # 2024-07-02 11:16:24,774 - INFO - GenericZarrHandler.py:276 - publish_cloud_optimised_fileset_batch - append data to existing Zarr + # 2024-07-02 11:16:24,837 - INFO - GenericZarrHandler.py:303 - publish_cloud_optimised_fileset_batch - Duplicate values of TIME + # 2024-07-02 11:16:24,839 - INFO - GenericZarrHandler.py:353 - publish_cloud_optimised_fileset_batch - Overwriting Zarr dataset in Region: {'TIME': slice(0, 1, None)}, Matching Indexes in new ds: [0] + # 2024-07-02 11:16:25,905 - INFO - GenericZarrHandler.py:353 - publish_cloud_optimised_fileset_batch - Overwriting Zarr dataset in Region: {'TIME': slice(2, 4, None)}, Matching Indexes in new ds: [1 2] + # 2024-07-02 11:16:26,631 - INFO - GenericZarrHandler.py:391 - publish_cloud_optimised_fileset_batch - Batch 1 processed and written to + nc_obj_ls_non_contiguous = nc_obj_ls[0:1] + nc_obj_ls[2:4] + with patch.object(self.handler_nc_acorn_file, "s3_fs", new=self.s3_fs): + self.handler_nc_acorn_file.to_cloud_optimised(nc_obj_ls_non_contiguous) + + # read zarr + dataset_config = load_dataset_config(DATASET_CONFIG_NC_ACORN_JSON) + dataset_name = dataset_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.zarr/" + + # TODO: calling open_zarr in the unitest is crazy finiky. Sometimes it works sometimes it doesnt + # ValueError: The future belongs to a different loop than the one specified as the loop argument + # the only way is to run it multiple times. Could be a local machine issue + # Also debugging and trying to load open_zarr in debug doesnt work... However it's possible to do a + # print(np.nanmax(ds.UCUR.values)) to get value to write unittests + + ds = xr.open_zarr(self.s3_fs.get_mapper(dname), consolidated=True) + self.assertEqual(ds.UCUR.standard_name, "eastward_sea_water_velocity") + + np.testing.assert_almost_equal( + np.nanmax(ds.UCUR.values), + 0.69455004, + decimal=3, + err_msg="Maximum value in UCUR is not as expected.", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_aodn_cloud_optimised/test_genericparquethandler.py b/test_aodn_cloud_optimised/test_genericparquethandler.py deleted file mode 100644 index 7a66d11..0000000 --- a/test_aodn_cloud_optimised/test_genericparquethandler.py +++ /dev/null @@ -1,289 +0,0 @@ -import json -import os -import shutil -import tempfile -import unittest -from unittest.mock import patch - -import pandas as pd -import pyarrow as pa - -from shapely.geometry import Polygon -from shapely import wkb - -from aodn_cloud_optimised.lib.GenericParquetHandler import GenericHandler -from aodn_cloud_optimised.lib.config import load_dataset_config - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Specify the filename relative to the current directory -TEST_FILE_NC = os.path.join( - ROOT_DIR, - "resources", - "IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc", -) -DUMMY_FILE = os.path.join(ROOT_DIR, "resources", "DUMMY.nan") -DUMMY_NC_FILE = os.path.join(ROOT_DIR, "resources", "DUMMY.nc") -TEST_CSV_FILE = os.path.join( - ROOT_DIR, "resources", "A69-1105-135_107799906_130722039.csv" -) -CONFIG_CSV_JSON = os.path.join(ROOT_DIR, "resources", "aatams_acoustic_tagging.json") - - -class TestGenericHandler(unittest.TestCase): - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def setUp(self, mock_get_s3_raw_obj): - # Create a temporary directory - self.tmp_dir = tempfile.mkdtemp() - - # Copy the test NetCDF file to the temporary directory - self.tmp_nc_path = os.path.join(self.tmp_dir, os.path.basename(TEST_FILE_NC)) - shutil.copy(TEST_FILE_NC, self.tmp_nc_path) - - self.tmp_dummy_file_path = os.path.join( - self.tmp_dir, os.path.basename(DUMMY_FILE) - ) - shutil.copy(DUMMY_FILE, self.tmp_dummy_file_path) - - self.tmp_dummy_nc_path = os.path.join( - self.tmp_dir, os.path.basename(DUMMY_NC_FILE) - ) - shutil.copy(DUMMY_NC_FILE, self.tmp_dummy_nc_path) - - dataset_config = { - "dataset_name": "dummy", - "cloud_optimised_format": "parquet", - "metadata_uuid": "a681fdba-c6d9-44ab-90b9-113b0ed03536", - "gattrs_to_variables": ["site_code"], - "partition_keys": ["site_code", "timestamp", "polygon"], - "time_extent": {"time": "TIME", "partition_timestamp_period": "Q"}, - "spatial_extent": { - "lat": "LATITUDE", - "lon": "LONGITUDE", - "spatial_resolution": 5, - }, - "schema": { - "TIMESERIES": {"type": "int32"}, - "LATITUDE": {"type": "double"}, - "LONGITUDE": {"type": "double", "axis": "X"}, - "NOMINAL_DEPTH": {"type": "float", "standard_name": "depth"}, - "TEMP": {"type": "float", "standard_name": "sea_water_temperature"}, - "DUMMY1": {"type": "float"}, - "timestamp": {"type": "int64"}, - "site_code": {"type": "string"}, - "filename": {"type": "string"}, - "polygon": {"type": "string"}, - "TIME": {"type": "timestamp[ns]"}, - }, - "dataset_gattrs": {"title": "dummy"}, - "force_old_pq_del": True, - } - - self.handler = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config, - ) - - self.handler_dummy_file = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_dummy_file_path), - dataset_config=dataset_config, - ) - - self.handler_dummy_nc_file = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_dummy_nc_path), - dataset_config=dataset_config, - ) - - # modify the path of the parquet dataset output - self.handler.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - - self.tmp_csv_path = os.path.join(self.tmp_dir, os.path.basename(TEST_CSV_FILE)) - shutil.copy(TEST_CSV_FILE, self.tmp_csv_path) - dataset_csv_config = load_dataset_config(CONFIG_CSV_JSON) - self.handler_csv_file = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_csv_path), - dataset_config=dataset_csv_config, - ) - self.handler_csv_file.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - - # Create a mock object for xr.open_dataset - # self.mock_open_dataset = MagicMock() - - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_get_s3_raw_obj(self, mock_get_s3_raw_obj): - with patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client"): - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - tmp_filepath = self.handler.get_s3_raw_obj() - self.assertEqual(tmp_filepath, self.tmp_nc_path) - - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_data_to_df_ds(self, mock_get_s3_raw_obj): - # Configure the mock object to return the path of the copied NetCDF file - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - - # Call the preprocess_data method - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Assert that ds.site_code is equal to the expected value - self.assertEqual(ds.site_code, "SYD140") - - def test_add_columns_df(self): - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Call the method to add columns such as timestamp, sitecode ... to the DataFrame - result_df = self.handler._add_timestamp_df(df) - result_df = self.handler._add_columns_df(result_df, ds) - - # Check if the column are added with the correct values - self.assertIn("site_code", result_df.columns) - self.assertEqual(result_df["site_code"].iloc[0], "SYD140") - self.assertEqual( - result_df["filename"].iloc[0], os.path.basename(self.tmp_nc_path) - ) - self.assertEqual(result_df["timestamp"].iloc[0], 1617235200.0) - - @patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client") - def test_create_data_parquet(self, mock_boto3_client): - # Set up mock return values and inputs - mock_s3_client = mock_boto3_client.return_value - # Mock the return value of s3.download_file to simulate file download - mock_s3_client.download_file.return_value = self.tmp_nc_path - - # Call the get_s3_raw_obj method (which should now use the mocked behavior) - self.handler.get_s3_raw_obj() - - self.handler.tmp_input_file = self.tmp_nc_path # overwrite value in handler - - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - self.handler.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # The following section shows how the created polygon variable can be used to perform data queries. this adds significant overload, but is worth it - df["converted_polygon"] = df["polygon"].apply( - lambda x: wkb.loads(bytes.fromhex(x)) - ) - - # Define the predefined polygon - predefined_polygon_coords_out = [(150, -40), (155, -40), (155, -45), (150, -45)] - predefined_polygon_coords_in = [(150, -32), (155, -32), (155, -45), (150, -45)] - - predefined_polygon_out = Polygon(predefined_polygon_coords_out) - predefined_polygon_in = Polygon(predefined_polygon_coords_in) - - df_unique_polygon = df["converted_polygon"].unique()[0] - self.assertFalse(df_unique_polygon.intersects(predefined_polygon_out)) - self.assertTrue(df_unique_polygon.intersects(predefined_polygon_in)) - - # Assert the expected values in the Parquet dataset - self.assertNotIn( - "TEMP_quality_control", parquet_dataset.columns - ) # make sure the variable is removed - self.assertIn("TEMP", parquet_dataset.columns) - - # Testing the metadata sidecar file - # Reading the metadata file of the dataset (at the root) - parquet_meta_file_path = os.path.join( - self.handler.cloud_optimised_output_path, "_common_metadata" - ) - parquet_meta = pa.parquet.read_schema(parquet_meta_file_path) - - # horrible ... but got to be done. The dictionary of metadata has to be a dictionnary with byte keys and byte values. - # meaning that we can't have nested dictionaries ... - decoded_meta = { - key.decode("utf-8"): json.loads(value.decode("utf-8").replace("'", '"')) - for key, value in parquet_meta.metadata.items() - } - - self.assertEqual(decoded_meta["LONGITUDE"]["axis"], "X") - self.assertEqual(decoded_meta["NOMINAL_DEPTH"]["standard_name"], "depth") - - # alternative way to access the metadata - # Create a dictionary where keys are the names and values are the elements - schema_dict = {obj.name: obj for obj in parquet_meta} - self.assertEqual( - schema_dict["TEMP"].metadata.get(b"standard_name"), b"sea_water_temperature" - ) - # other way to access the metadata - schema_dict = {obj.name: obj.metadata for obj in parquet_meta} - self.assertEqual( - schema_dict["TEMP"][b"standard_name"], b"sea_water_temperature" - ) - - def test_create_csv_data_parquet(self): - # Mock the return value of self.get_partition_parameters_data() - # with patch.object(self.handler_no_schema, 'get_partition_parameters_data', return_value=["site_code"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler_csv_file.preprocess_data(self.tmp_csv_path) - df, ds = next(generator) - - self.handler_csv_file.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler_csv_file.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - # For example, assert that the 'site_code' column is present and contains the expected value - self.assertIn("station_name", parquet_dataset.columns) - - # Testing the metadata sidecar file - - @patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client") - def test_handler_main_function(self, mock_boto3_client): - # Set up mock return values and inputs - mock_s3_client = mock_boto3_client.return_value - # Mock the return value of s3.download_file to simulate file download - mock_s3_client.download_file.return_value = self.tmp_nc_path - self.handler.tmp_input_file = self.tmp_nc_path - # Call the get_s3_raw_obj method (which should now use the mocked behavior) - self.handler.to_cloud_optimised() - - # Read the Parquet dataset - parquet_file_path = self.handler.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - self.assertNotIn("station_name", parquet_dataset.columns) - self.assertAlmostEqual(parquet_dataset["TEMP"][0], 13.2773, delta=1e-2) - - def test_dummies(self): - with self.assertRaises(ValueError): - self.handler_dummy_file.to_cloud_optimised() - - with self.assertRaises(TypeError): - self.handler_dummy_nc_file.to_cloud_optimised() - - def tearDown(self): - # Remove the temporary directory and its contents - shutil.rmtree(self.tmp_dir) - - -if __name__ == "__main__": - unittest.main() diff --git a/test_aodn_cloud_optimised/test_parquet_argo_handler.py b/test_aodn_cloud_optimised/test_parquet_argo_handler.py new file mode 100644 index 0000000..02739fb --- /dev/null +++ b/test_aodn_cloud_optimised/test_parquet_argo_handler.py @@ -0,0 +1,189 @@ +import json +import os +import unittest +from unittest.mock import patch + +import boto3 +import numpy as np +import pandas as pd +import s3fs +from moto import mock_aws +from moto.moto_server.threaded_moto_server import ThreadedMotoServer +from numpy.testing import assert_array_equal + +from aodn_cloud_optimised.lib.ArgoHandler import ArgoHandler +from aodn_cloud_optimised.lib.config import load_dataset_config +from aodn_cloud_optimised.lib.s3Tools import s3_ls + +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# Specify the filename relative to the current directory +TEST_FILE_NC = os.path.join(ROOT_DIR, "resources", "2902093_prof.nc") +TEST_FILE_BAD_GEOM_NC = os.path.join(ROOT_DIR, "resources", "5905017_prof.nc") + +DATASET_CONFIG = os.path.join(ROOT_DIR, "resources", "argo_core.json") + + +@mock_aws +class TestArgoHandler(unittest.TestCase): + def setUp(self): + self.BUCKET_OPTIMISED_NAME = "imos-data-lab-optimised" + self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH = "testing" + self.s3 = boto3.client("s3", region_name="us-east-1") + self.s3.create_bucket(Bucket="imos-data") + self.s3.create_bucket(Bucket=self.BUCKET_OPTIMISED_NAME) + + # create moto server; needed for s3fs and parquet + self.server = ThreadedMotoServer(ip_address="127.0.0.1", port=5555) + + # TODO: use it for patching? + self.s3_fs = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "http://127.0.0.1:5555/", + "region_name": "us-east-1", + }, + ) + + self.server.start() + + # Make the "imos-data" bucket public + public_policy_imos_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::imos-data/*", + } + ], + } + + public_policy_cloud_optimised_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{self.BUCKET_OPTIMISED_NAME}/*", + } + ], + } + + self.s3.put_bucket_policy( + Bucket="imos-data", Policy=json.dumps(public_policy_imos_data) + ) + + self.s3.put_bucket_policy( + Bucket=self.BUCKET_OPTIMISED_NAME, + Policy=json.dumps(public_policy_cloud_optimised_data), + ) + + # Copy files to the mock S3 bucket + self._upload_to_s3( + "imos-data", f"good_nc_argo/{os.path.basename(TEST_FILE_NC)}", TEST_FILE_NC + ) + self._upload_to_s3( + "imos-data", + f"bad_geom_argo/{os.path.basename(TEST_FILE_BAD_GEOM_NC)}", + TEST_FILE_BAD_GEOM_NC, + ) + + self.dataset_argo_netcdf_config = load_dataset_config(DATASET_CONFIG) + + self.handler_nc_argo_file = ArgoHandler( + optimised_bucket_name=self.BUCKET_OPTIMISED_NAME, + root_prefix_cloud_optimised_path=self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH, + dataset_config=self.dataset_argo_netcdf_config, + clear_existing_data=True, + force_previous_parquet_deletion=True, + cluster_mode="local", + ) + + def _upload_to_s3(self, bucket_name, key, file_path): + with open(file_path, "rb") as f: + self.s3.upload_fileobj(f, bucket_name, key) + + def tearDown(self): + self.server.stop() + + def test_parquet_nc_argo_handler(self): + nc_obj_ls = s3_ls("imos-data", "good_nc_argo") + + # 1st pass + with patch.object(self.handler_nc_argo_file, "s3_fs", new=self.s3_fs): + self.handler_nc_argo_file.to_cloud_optimised([nc_obj_ls[0]]) + + # 2nd pass, process the same file a second time. Should be deleted + # TODO: Not a big big deal breaker, but got an issue which should be fixed in the try except only for the unittest + # 2024-07-01 16:04:54,721 - INFO - GenericParquetHandler.py:824 - delete_existing_matching_parquet - No files to delete: GetFileInfo() yielded path 'imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/site_code=SYD140/timestamp=1625097600/polygon=01030000000100000005000000000000000020624000000000008041C0000000000060634000000000008041C0000000000060634000000000000039C0000000000020624000000000000039C0000000000020624000000000008041C0/IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc-0.parquet', which is outside base dir 's3://imos-data-lab-optimised/testing/anmn_ctd_ts_fv01.parquet/' + with patch.object(self.handler_nc_argo_file, "s3_fs", new=self.s3_fs): + self.handler_nc_argo_file.to_cloud_optimised_single(nc_obj_ls[0]) + + # read parquet + dataset_name = self.dataset_argo_netcdf_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.parquet/" + + parquet_dataset = pd.read_parquet( + dname, + engine="pyarrow", + storage_options={ + "client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"} + }, + ) + + self.assertEqual(parquet_dataset["filename"][0], os.path.basename(TEST_FILE_NC)) + + # this checks that bad_timestamps values are cleaned + self.assertEqual(parquet_dataset["timestamp"][0], 1356998400.0) + self.assertEqual( + parquet_dataset["JULD"][0], pd.Timestamp("2013-02-26 03:15:00") + ) + + # Assert the expected values in the Parquet dataset + self.assertIn("PSAL_ADJUSTED", parquet_dataset.columns) + self.assertIn("TEMP_ADJUSTED", parquet_dataset.columns) + assert_array_equal( + np.unique(parquet_dataset.PLATFORM_NUMBER.values), np.array([2902093]) + ) + + def test_parquet_nc_argo_bad_geom_handler(self): + nc_obj_ls = s3_ls("imos-data", "bad_geom_argo") + + # 1st pass + with patch.object(self.handler_nc_argo_file, "s3_fs", new=self.s3_fs): + self.handler_nc_argo_file.to_cloud_optimised_single(nc_obj_ls[0]) + + # read parquet + dataset_name = self.dataset_argo_netcdf_config["dataset_name"] + dname = f"s3://{self.BUCKET_OPTIMISED_NAME}/{self.ROOT_PREFIX_CLOUD_OPTIMISED_PATH}/{dataset_name}.parquet/" + + parquet_dataset = pd.read_parquet( + dname, + engine="pyarrow", + storage_options={ + "client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"} + }, + ) + + # TODO: check the values are correct, why are the first 10 values removed? forgot! To investigate! + # JULD = "2016-01-07 22:06:34", "2016-01-17 15:35:41", "2016-01-27 09:41:07", + # "2016-02-06 03:32:09", "2016-02-15 22:51:46", "2016-02-25 17:19:12", + # "2016-03-06 12:20:15", "2016-03-16 07:49:21", "2016-03-26 01:49:47", + # "2016-04-04 21:19:44", "2016-04-14 15:09:26", "2016-04-24 11:00:24", + # "2016-05-04 06:12:07", "2016-05-14 01:40:30", "2016-05-23 20:29:18", + # Assert the expected values in the Parquet dataset + self.assertEqual(parquet_dataset["timestamp"][0], 1451606400) + self.assertEqual( + parquet_dataset["JULD"][0], pd.Timestamp("2016-05-23 20:29:18") + ) + + # Assert the expected values in the Parquet dataset + self.assertIn("PSAL_ADJUSTED", parquet_dataset.columns) + self.assertIn("TEMP_ADJUSTED", parquet_dataset.columns) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_aodn_cloud_optimised/test_s3_tools.py b/test_aodn_cloud_optimised/test_s3_tools.py deleted file mode 100755 index c5a9d10..0000000 --- a/test_aodn_cloud_optimised/test_s3_tools.py +++ /dev/null @@ -1,30 +0,0 @@ -import unittest -from unittest.mock import MagicMock - -from aodn_cloud_optimised.lib.s3Tools import s3_ls - - -class TestS3Ls(unittest.TestCase): - def test_s3_ls(self): - # Mocking boto3 client - boto3_client_mock = MagicMock() - boto3_client_mock.get_paginator.return_value.paginate.return_value = [ - { - "Contents": [ - {"Key": "prefix/file1.nc"}, - {"Key": "prefix/file2.nc"}, - {"Key": "prefix/file3.txt"}, - ] - } - ] - - with unittest.mock.patch("boto3.client", return_value=boto3_client_mock): - # Call the function - result = s3_ls("test-bucket", "prefix") - - # Assert the result - self.assertEqual(result, ["prefix/file1.nc", "prefix/file2.nc"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/test_aodn_cloud_optimised/test_s3tools.py b/test_aodn_cloud_optimised/test_s3tools.py new file mode 100755 index 0000000..8eb3d68 --- /dev/null +++ b/test_aodn_cloud_optimised/test_s3tools.py @@ -0,0 +1,114 @@ +import json +import unittest + +import boto3 +from moto import mock_aws +from moto.moto_server.threaded_moto_server import ThreadedMotoServer + +from aodn_cloud_optimised.lib.s3Tools import ( + s3_ls, + create_fileset, + split_s3_path, + delete_objects_in_prefix, + prefix_exists, +) +from unittest.mock import patch, MagicMock, Mock +import s3fs + + +@mock_aws() +class TestS3Tools(unittest.TestCase): + def setUp(self): + self.server = ThreadedMotoServer(ip_address="127.0.0.1", port=5555) + self.server.start() + + # Create a mock S3 service + self.s3 = boto3.client("s3", region_name="us-east-1") + self.bucket_name = "test-bucket" + self.s3.create_bucket(Bucket=self.bucket_name) + + # Make the "imos-data" bucket public + public_policy_imos_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{self.bucket_name}/*", + } + ], + } + + self.s3.put_bucket_policy( + Bucket=self.bucket_name, Policy=json.dumps(public_policy_imos_data) + ) + + # Copy files to the mock S3 bucket + self.s3.put_object( + Bucket=self.bucket_name, Key="prefix/file1.nc", Body="" + ) # empty file + self.s3.put_object( + Bucket=self.bucket_name, Key="prefix/file2.nc", Body="" + ) # empty file + + def tearDown(self): + self.server.stop() + + def test_s3_ls(self): + result = s3_ls(self.bucket_name, "prefix") + + self.assertEqual( + result, + ["s3://test-bucket/prefix/file1.nc", "s3://test-bucket/prefix/file2.nc"], + ) + + def test_split(self): + bucket_name, key = split_s3_path("s3://test-bucket/prefix/file1.nc") + self.assertEqual(bucket_name, "test-bucket") + self.assertEqual(key, "prefix/file1.nc") + + def test_prefix_exists(self): + self.assertTrue(prefix_exists("s3://test-bucket/prefix/file1.nc")) + + def test_delete_objects_in_prefix(self): + delete_objects_in_prefix("test-bucket", "prefix") + result = s3_ls(self.bucket_name, "prefix") + self.assertListEqual(result, []) + + @patch("s3fs.S3FileSystem") + def test_create_fileset(self, MockS3FileSystem): + MockS3FileSystem.return_value = s3fs.S3FileSystem( + anon=False, client_kwargs={"endpoint_url": "http://127.0.0.1:5555/"} + ) + + fileset = create_fileset("s3://test-bucket/prefix/file1.nc") + self.assertEqual(fileset[0].path, "test-bucket/prefix/file1.nc") + + @patch.object(s3fs.S3FileSystem, "open") + def test_create_fileset(self, mock_open): + # Prepare a list of mock objects for each file path + mock_files = [] + s3_paths = [ + "s3://test-bucket/prefix/file1.nc", + "s3://test-bucket/prefix/file2.nc", + ] + + for s3_path in s3_paths: + mock_file = Mock() + mock_file.path = s3_path + mock_files.append(mock_file) + + # Configure mock_open to return the appropriate mock file for each call + mock_open.side_effect = mock_files + + # Call the function under test + fileset = create_fileset(s3_paths) + + # Assert that each file in the fileset has the correct path + for idx, file_obj in enumerate(fileset): + self.assertEqual(file_obj.path, s3_paths[idx]) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_aodn_cloud_optimised/test_schema.py b/test_aodn_cloud_optimised/test_schema.py index d36a737..1930286 100644 --- a/test_aodn_cloud_optimised/test_schema.py +++ b/test_aodn_cloud_optimised/test_schema.py @@ -1,22 +1,79 @@ +import json import unittest from unittest.mock import patch, mock_open, MagicMock -import os -import json import pyarrow as pa import xarray as xr +import os +import boto3 +from moto import mock_aws +from moto.moto_server.threaded_moto_server import ThreadedMotoServer +from aodn_cloud_optimised.lib.s3Tools import s3_ls +import s3fs from aodn_cloud_optimised.lib.schema import ( - generate_pyarrow_schema_from_s3_netcdf, - create_pyarrow_schema_from_list, create_pyrarrow_schema_from_dict, create_pyarrow_schema, generate_json_schema_from_s3_netcdf, + generate_json_schema_var_from_netcdf, ) +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# Specify the filename relative to the current directory +TEST_FILE_NC_ANMN = os.path.join( + ROOT_DIR, + "resources", + "IMOS_ANMN-NSW_CDSTZ_20210429T015500Z_SYD140_FV01_SYD140-2104-SBE37SM-RS232-128_END-20210812T011500Z_C-20210827T074819Z.nc", +) +TEST_FILE_NC_ANMN_SCHEMA = TEST_FILE_NC_ANMN.replace(".nc", ".schema") + + +@mock_aws() class TestNetCDFSchemaGeneration(unittest.TestCase): def setUp(self): + # Create a mock S3 service + self.s3 = boto3.client("s3", region_name="us-east-1") + self.bucket_name = "imos-data" + self.s3.create_bucket(Bucket=self.bucket_name) + + # Make the "imos-data" bucket public + public_policy_imos_data = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{self.bucket_name}/*", + } + ], + } + + self.s3.put_bucket_policy( + Bucket=self.bucket_name, Policy=json.dumps(public_policy_imos_data) + ) + + # Copy files to the mock S3 bucket + self._upload_to_s3( + self.bucket_name, + f"good_nc_anmn/{os.path.basename(TEST_FILE_NC_ANMN)}", + TEST_FILE_NC_ANMN, + ) + + self.nc_s3_path = s3_ls("imos-data", "good_nc_anmn")[0] + + self.server = ThreadedMotoServer(ip_address="127.0.0.1", port=5555) + self.server.start() + self.s3_fs = s3fs.S3FileSystem( + anon=False, + client_kwargs={ + "endpoint_url": "http://127.0.0.1:5555/", + "region_name": "us-east-1", + }, + ) + # Define some sample data for testing self.schema_list = [ "temperature: double", @@ -38,56 +95,13 @@ def setUp(self): } self.sub_schema_strings = ["filename: string", "site_code: string"] - self.sub_schema = create_pyarrow_schema_from_list(self.sub_schema_strings) - - @patch("aodn_cloud_optimised.lib.schema.xr.open_dataset") - @patch("aodn_cloud_optimised.lib.schema.s3fs.S3FileSystem") - def test_generate_pyarrow_schema_from_s3_netcdf( - self, mock_s3fs, mock_xr_open_dataset - ): - # Mock S3 file access and Xarray open_dataset function - mock_s3 = MagicMock() - mock_s3.open.return_value = mock_open(read_data=b"dummy_data").return_value - mock_s3fs.return_value = mock_s3 - - # Mock Xarray open_dataset function to return a dummy dataset. We really don't care at this stage of the values - dummy_dataset = xr.Dataset( - { - "temperature": xr.DataArray([1, 2, 3]), - "humidity": xr.DataArray([4, 5, 6]), - "pressure": xr.DataArray([7, 8, 9]), - "timestamp": xr.DataArray([10, 11, 12]), - "location": xr.DataArray(["A", "B", "C"]), - } - ) - mock_xr_open_dataset.return_value = dummy_dataset - - # Test the function with mocked S3 file access - result_schema = generate_pyarrow_schema_from_s3_netcdf( - "dummy_s3_path", self.sub_schema - ) - # Check if the result pyarrow_schema is an instance of PyArrow pyarrow_schema - self.assertIsInstance(result_schema, pa.Schema) - - # You can add more assertions based on your expected behavior + def tearDown(self): + self.server.stop() - def test_create_schema_from_list(self): - # Test pyarrow_schema creation from pyarrow_schema strings - result_schema = create_pyarrow_schema_from_list(self.schema_list) - - # Check if the result pyarrow_schema is an instance of PyArrow pyarrow_schema - self.assertIsInstance(result_schema, pa.Schema) - - # Check if the pyarrow_schema fields match the expected fields - expected_fields = [ - pa.field("temperature", pa.float64()), - pa.field("humidity", pa.float32()), - pa.field("pressure", pa.int32()), - pa.field("timestamp", pa.timestamp("ns")), - pa.field("location", pa.string()), - ] - self.assertEqual(result_schema, pa.schema(expected_fields)) + def _upload_to_s3(self, bucket_name, key, file_path): + with open(file_path, "rb") as f: + self.s3.upload_fileobj(f, bucket_name, key) def test_create_schema_from_dict(self): # Test pyarrow_schema creation from pyarrow_schema strings @@ -121,43 +135,53 @@ def test_create_schema(self): result_schema = create_pyarrow_schema(self.schema_list) self.assertEqual(result_schema, pa.schema(expected_fields)) - @patch("aodn_cloud_optimised.lib.schema.xr.open_dataset") - @patch("aodn_cloud_optimised.lib.schema.s3fs.S3FileSystem") - def test_generate_json_schema_from_s3_netcdf(self, mock_s3fs, mock_xr_open_dataset): - # Mock the S3 file system - mock_s3 = MagicMock() - mock_s3fs.return_value = mock_s3 - - # Mock the open_dataset method to return a dataset - mock_dataset = MagicMock() - mock_xr_open_dataset.return_value = mock_dataset - - # Define mock dataset variables and coordinates - mock_dataset.variables = { - "lon": MagicMock(dtype="float32", attrs={"units": "degrees_east"}), - "lat": MagicMock(dtype="float32", attrs={"units": "degrees_north"}), - "time": MagicMock( - dtype="datetime64[ns]", attrs={"units": "seconds since 1970-01-01"} - ), + def test_generate_json_schema_from_s3_netcdf(self): + def assert_file_contents_equal(file1_path, file2_path): + with open(file1_path, "r") as f1, open(file2_path, "r") as f2: + content1 = f1.read() + content2 = f2.read() + assert ( + content1 == content2 + ), f"File contents do not match: {file1_path} != {file2_path}" + + try: + loaded_schema_file = generate_json_schema_from_s3_netcdf( + self.nc_s3_path, s3_fs=self.s3_fs + ) + assert_file_contents_equal(loaded_schema_file, TEST_FILE_NC_ANMN_SCHEMA) + + finally: + os.remove(loaded_schema_file) + + def test_generate_json_schema_var_from_netcdf(self): + + json_expected = { + "TEMP": { + "type": "float32", + "ancillary_variables": "TEMP_quality_control", + "long_name": "sea_water_temperature", + "standard_name": "sea_water_temperature", + "units": "degrees_Celsius", + "valid_max": 40.0, + "valid_min": -2.5, + } } - mock_dataset.coords = {} - # Call the function under test - temp_file_path = generate_json_schema_from_s3_netcdf( - "s3://your-bucket/path/to/file.nc" - ) + # Test from a local file + json_output = generate_json_schema_var_from_netcdf(TEST_FILE_NC_ANMN, "TEMP") + self.assertEqual(json_output, json.dumps(json_expected, indent=2)) - # Load the generated JSON schema - with open(temp_file_path, "r") as json_file: - loaded_schema = json.load(json_file) + # Test from a s3fs file + json_output = generate_json_schema_var_from_netcdf( + self.s3_fs.open(self.nc_s3_path), "TEMP", s3_fs=self.s3_fs + ) + self.assertEqual(json_output, json.dumps(json_expected, indent=2)) - # Assert the content of the loaded schema - expected_schema = { - "lon": {"type": "float", "units": "degrees_east"}, - "lat": {"type": "float", "units": "degrees_north"}, - "time": {"type": "timestamp[ns]", "units": "seconds since 1970-01-01"}, - } - self.assertEqual(loaded_schema, expected_schema) + # Test from a s3 file + json_output = generate_json_schema_var_from_netcdf( + self.nc_s3_path, "TEMP", s3_fs=self.s3_fs + ) + self.assertEqual(json_output, json.dumps(json_expected, indent=2)) if __name__ == "__main__": diff --git a/test_aodn_cloud_optimised/test_soopxbtnrt.py b/test_aodn_cloud_optimised/test_soopxbtnrt.py deleted file mode 100644 index 76b1083..0000000 --- a/test_aodn_cloud_optimised/test_soopxbtnrt.py +++ /dev/null @@ -1,177 +0,0 @@ -import os -import shutil -import tempfile -import unittest -import yaml -from unittest.mock import patch, MagicMock - -import pandas as pd -from numpy.testing import assert_array_equal - -from aodn_cloud_optimised.lib.GenericParquetHandler import GenericHandler -from aodn_cloud_optimised.lib.config import load_dataset_config - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Specify the filename relative to the current directory -TEST_FILE_NC = os.path.join( - ROOT_DIR, "resources", "IMOS_SOOP-XBT_T_20240218T141800Z_IX28_FV00_ID_9797539.nc" -) - -CONFIG_JSON = os.path.join(ROOT_DIR, "resources", "soop_xbt_nrt.json") - - -class TestGenericHandler(unittest.TestCase): - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def setUp(self, mock_get_s3_raw_obj): - # Create a temporary directory - self.tmp_dir = tempfile.mkdtemp() - - # Copy the test NetCDF file to the temporary directory - self.tmp_nc_path = os.path.join(self.tmp_dir, os.path.basename(TEST_FILE_NC)) - shutil.copy(TEST_FILE_NC, self.tmp_nc_path) - - dataset_config = load_dataset_config(CONFIG_JSON) - - dataset_config_no_schema = { - "dataset_name": "dummy_table_name", - "cloud_optimised_format": "parquet", - "metadata_uuid": "35234913-aa3c-48ec-b9a4-77f822f66ef8", - "gattrs_to_variables": ["XBT_line", "ship_name", "Callsign", "imo_number"], - "partition_keys": ["XBT_line", "timestamp"], - "time_extent": {"time": "TIME", "partition_timestamp_period": "M"}, - "spatial_extent": { - "lat": "LATITUDE", - "lon": "LONGITUDE", - "spatial_resolution": 5, - }, - "schema": {}, - "dataset_gattrs": {"title": "SOOP XBT NRT"}, - "force_old_pq_del": False, - } - - self.handler = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config_no_schema, - force_old_pq_del=False, - ) - - self.handler_with_schema = GenericHandler( - raw_bucket_name="dummy_raw_bucket", - optimised_bucket_name="dummy_optimised_bucket", - input_object_key=os.path.basename(self.tmp_nc_path), - dataset_config=dataset_config, - ) - - # modify the path of the parquet dataset output - self.handler.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - self.handler_with_schema.cloud_optimised_output_path = os.path.join( - self.tmp_dir, "dummy_dataset_name" - ) - - # Create a mock object for xr.open_dataset - self.mock_open_dataset = MagicMock() - - # test method inherited from super - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_get_s3_raw_obj(self, mock_get_s3_raw_obj): - with patch("aodn_cloud_optimised.lib.GenericParquetHandler.boto3.client"): - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - tmp_filepath = self.handler.get_s3_raw_obj() - self.assertEqual(tmp_filepath, self.tmp_nc_path) - - @patch( - "aodn_cloud_optimised.lib.GenericParquetHandler.GenericHandler.get_s3_raw_obj" - ) - def test_data_to_df_ds(self, mock_get_s3_raw_obj): - # Configure the mock object to return the path of the copied NetCDF file - mock_get_s3_raw_obj.return_value = self.tmp_nc_path - - # Call the preprocess_data method - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Assert that ds.site_code is equal to the expected value - assert_array_equal(ds.XBT_line, "IX28") - - def test_add_columns_df_and_bad_timestamps(self): - # with patch.object(self.handler_no_schema, 'get_partition_parameters_data', return_value=["XBT_line"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - # Call the method to add columns to the DataFrame - result_df = self.handler._add_timestamp_df(df) - result_df = self.handler._add_columns_df(result_df, ds) - - # now we call the next function to remove the bad timestamp values ( which does also a reindexing) - result_df = self.handler._rm_bad_timestamp_df(result_df) - self.assertEqual(result_df["timestamp"][0], 1706745600.0) - self.assertEqual(result_df.index.name, "DEPTH") - self.assertEqual( - result_df.filename[0], - "IMOS_SOOP-XBT_T_20240218T141800Z_IX28_FV00_ID_9797539.nc", - ) - - def test_create_data_parquet_with_mocked_parameters_and_with_schema(self): - # Mock the return value of self.get_partition_parameters_data() - # with patch.object(self.handler, 'get_partition_parameters_data', return_value=["XBT_line"]) as mock_get_params: - # Convert the Dataset to DataFrame using preprocess_data - generator = self.handler_with_schema.preprocess_data(self.tmp_nc_path) - df, ds = next(generator) - - self.handler_with_schema.publish_cloud_optimised(df, ds) - - # Read the Parquet dataset - parquet_file_path = self.handler_with_schema.cloud_optimised_output_path - parquet_dataset = pd.read_parquet(parquet_file_path) - - # Assert the expected values in the Parquet dataset - self.assertNotIn( - "DUMMY_VAR_NOT_IN", parquet_dataset.columns - ) # make sure the variable is removed - self.assertIn("TEMP", parquet_dataset.columns) - - self.assertEqual(parquet_dataset["timestamp"][0], 1706745600.0) - self.assertEqual( - parquet_dataset["TIME"][0], pd.Timestamp("2024-02-18 14:18:00") - ) - - def test_push_metadata_aws_registry(self): - - # Load the registry config from the JSON file - expected_yaml_data = yaml.dump( - self.handler_with_schema.dataset_config["aws_opendata_registry"] - ) - - with patch("boto3.client") as mock_client: - mock_s3_client = MagicMock() - mock_client.return_value = mock_s3_client - - self.handler_with_schema.push_metadata_aws_registry() - - # Assert that put_object was called with the correct parameters - # expected_key = os.path.join(self.tmp_dir, 'dummy_dataset_name/soop_xbt_nrt.yaml') - expected_key = os.path.join("parquet/loz_test/soop_xbt_nrt.yaml") - - mock_s3_client.put_object.assert_called_once_with( - Bucket=self.handler_with_schema.optimised_bucket_name, - Key=expected_key, - Body=expected_yaml_data.encode("utf-8"), - ) - - def tearDown(self): - # Remove the temporary directory and its contents - shutil.rmtree(self.tmp_dir) - - -if __name__ == "__main__": - unittest.main()