aodn · mhidas · Feb 24, 2020 · Feb 6, 2020 · Feb 6, 2020 · Feb 6, 2020
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.14
+current_version = 1.1.0
 commit = False
 tag = False
 tag_name = {new_version}
@@ -25,4 +25,3 @@ replace = __version__ = '{new_version}'
 [bumpversion:file:setup.py]
 search = version='{current_version}',
 replace = version='{new_version}',
-
diff --git a/aodndata/moorings/products_handler.py b/aodndata/moorings/products_handler.py
@@ -2,7 +2,7 @@
 import os
 import re
 
-from owslib.fes import PropertyIsEqualTo, PropertyIsNotEqualTo, And
+from owslib.fes import PropertyIsEqualTo, PropertyIsNotEqualTo, And, Or
 
 from aodncore.pipeline import HandlerBase, PipelineFilePublishType, PipelineFile, FileType
 from aodncore.pipeline.exceptions import (InvalidFileContentError, InvalidFileNameError, InvalidFileFormatError,
@@ -11,11 +11,31 @@
 from aodncore.util.wfs import ogc_filter_to_string
 
 from aodntools.timeseries_products.aggregated_timeseries import main_aggregator
+from aodntools.timeseries_products.hourly_timeseries import hourly_aggregator
 
 from aodndata.moorings.classifiers import MooringsFileClassifier
 
 
-AGGREGATED_VARIABLE_PATTERN = re.compile(r'FV01_([A-Z0-9-]+)-aggregated')
+PRODUCT_TYPE_PATTERN = re.compile(r'FV0[12]_([^_]+)_END')
+VALID_PRODUCTS = {'aggregated', 'hourly'}
+DOWNLOAD_URL_PREFIX = "https://s3-ap-southeast-2.amazonaws.com/imos-data/"
+OPENDAP_URL_PREFIX = "http://thredds.aodn.org.au/thredds/dodsC/"
+
+
+def get_product_type(file_path):
+    """Return a product type label for the given file (extracted from the file name).
+    For example "PSAL-aggregated-timeseries", or "hourly-timeseries".
+
+    :param file_path: str path or name of file
+    :returns: str product type label
+    """
+    file_name = os.path.basename(file_path)
+    name_match = PRODUCT_TYPE_PATTERN.search(file_name)
+    if not name_match:
+        raise InvalidFileNameError(
+            "Could not extract produt type from '{file_name}'".format(file_name=file_name)
+        )
+    return name_match.group(1)
 
 
 class MooringsProductClassifier(MooringsFileClassifier):
@@ -92,9 +112,11 @@ def __init__(self, *args, **kwargs):
         self.allowed_extensions = ['.json_manifest', '.nc', '.zip']
         self.product_site_code = None
         self.product_variables = None
+        self.products_to_create = VALID_PRODUCTS
         self.input_file_collection = None
         self.input_file_variables = None
         self.excluded_files = dict()
+        self.product_qc_flags = [[1, 2], [0, 1, 2]]
 
     def _read_manifest(self):
         """Read the manifest file and extract key parameters for product"""
@@ -108,6 +130,14 @@ def _read_manifest(self):
             raise InvalidFileContentError(
                 "manifest file '{self.input_file}' missing information (site_code, variables)".format(self=self)
             )
+        if 'products' in manifest:
+            invalid_products = set(manifest['products']) - VALID_PRODUCTS
+            if invalid_products:
+                raise InvalidFileContentError(
+                    "invalid product(s) {invalid_products} requested "
+                    "in manifest file '{self.input_file}'".format(invalid_products=invalid_products, self=self)
+                )
+            self.products_to_create = set(manifest['products'])
 
     def get_wfs_features(self, filter_list, propertyname='*'):
         """Query the file index WFS layer with the given filters and return a list of features.
@@ -118,11 +148,6 @@ def get_wfs_features(self, filter_list, propertyname='*'):
         """
 
         ogc_filter = ogc_filter_to_string(And(filter_list))
-
-        # Note I need to access _wfs_broker to be able to use query_urls_for_layer() with a filter,
-        # as the corresponding StateQuery method doesn't accept additional kwargs.
-        # TODO: find out why this calls getCapabilities twice (and takes 40s even when response mocked with httpretty)
-        # TODO: replace ._wfs_broker.getfeature_dict() with .getfeature_dict() once aodncore has been updated
         wfs_response = self.state_query.query_wfs_getfeature_dict(typename=[self.FILE_INDEX_LAYER],
                                                                   filter=ogc_filter,
                                                                   propertyname=propertyname
@@ -164,34 +189,42 @@ def _get_input_files(self):
         # TODO: Replace temp_dir above with cache_dir?
 
     def _get_old_product_files(self):
-        """Get a list of the currently published aggregated_timeseries files for the site being processed."""
+        """Get a list of the currently published product files for the site being processed."""
 
         filter_list = [PropertyIsEqualTo(propertyname='site_code', literal=self.product_site_code),
-                       PropertyIsEqualTo(propertyname='data_category', literal='aggregated_timeseries')
+                       Or([PropertyIsEqualTo(propertyname='data_category', literal='aggregated_timeseries'),
+                           PropertyIsEqualTo(propertyname='data_category', literal='hourly_timeseries'),
+                           PropertyIsEqualTo(propertyname='data_category', literal='gridded_timeseries')
+                           ])
                        ]
         wfs_features = self.get_wfs_features(filter_list, propertyname=['url'])
 
         self.old_product_files = {}
         for f in wfs_features:
             product_url = f['properties']['url']
-            var_match = AGGREGATED_VARIABLE_PATTERN.search(product_url)
-            if not var_match:
-                raise InvalidFileNameError(
-                    "Could not determine variable of interest for '{product_url}'".format(product_url=product_url)
-                )
-            variable_of_interest = var_match.group(1).replace('-', '_')
-            if variable_of_interest not in self.old_product_files:
-                self.old_product_files[variable_of_interest] = [product_url]
+            product_type = get_product_type(product_url)
+            if product_type not in self.old_product_files:
+                self.old_product_files[product_type] = [product_url]
             else:
-                self.old_product_files[variable_of_interest].append(product_url)
+                self.old_product_files[product_type].append(product_url)
 
             self.logger.info(
-                "Old file for {variable_of_interest}: '{product_url}'".format(variable_of_interest=variable_of_interest,
-                                                                              product_url=product_url)
-                )
+                "Old file for {product_type}: '{product_url}'".format(product_type=product_type,
+                                                                      product_url=product_url)
+            )
+
+    def _handle_errors(self, errors):
+        """Keep track of any input files that were excluded from the product and log a brief warning."""
+        if errors:
+            self.logger.warning("{n} files were excluded from the product.".format(n=len(errors)))
+            for f, e in errors.items():
+                if f not in self.excluded_files:
+                    self.excluded_files[f] = set(e)
+                else:
+                    self.excluded_files[f].update(e)
 
     def _make_aggregated_timeseries(self):
-        """For each variable, generate product and add to file_collection."""
+        """For each variable, generate aggregated timeseries product and add to file_collection."""
 
         for var in self.product_variables:
             # Filter input_list to the files relevant for this var
@@ -204,31 +237,49 @@ def _make_aggregated_timeseries(self):
 
             product_url, errors = main_aggregator(input_list, var, self.product_site_code, input_dir=self.temp_dir,
                                                   output_dir=self.products_dir,
-                                                  download_url_prefix="https://s3-ap-southeast-2.amazonaws.com/imos-data/",
-                                                  opendap_url_prefix="http://thredds.aodn.org.au/thredds/dodsC/"
+                                                  download_url_prefix=DOWNLOAD_URL_PREFIX,
+                                                  opendap_url_prefix=OPENDAP_URL_PREFIX
                                                   )
-            if errors:
-                self.logger.warning("{n} files were excluded from the aggregation.".format(n=len(errors)))
-                for f, e in errors.items():
-                    if f not in self.excluded_files:
-                        self.excluded_files[f] = set(e)
-                    else:
-                        self.excluded_files[f].update(e)
+            self._handle_errors(errors)
+
+            product_file = PipelineFile(product_url, file_update_callback=self._file_update_callback)
+            product_file.publish_type = PipelineFilePublishType.HARVEST_UPLOAD
+            self.file_collection.add(product_file)
+
+            self._cleanup_previous_version(product_file.name)
+
+    def _make_hourly_timeseries(self):
+        """Generate hourly products for the site and add to file_collection."""
+
+        # Filter input_list to the files relevant for this var
+        input_list = [f.local_path for f in self.input_file_collection]
+        self.logger.info("Creating hourly products from {n} input files".format(n=len(input_list)))
+
+        for qc_flags in self.product_qc_flags:
+
+            product_url, errors = hourly_aggregator(input_list, self.product_site_code, qc_flags,
+                                                    input_dir=self.temp_dir,
+                                                    output_dir=self.products_dir,
+                                                    download_url_prefix=DOWNLOAD_URL_PREFIX,
+                                                    opendap_url_prefix=OPENDAP_URL_PREFIX
+                                                    )
+
+            self._handle_errors(errors)
 
             product_file = PipelineFile(product_url, file_update_callback=self._file_update_callback)
             product_file.publish_type = PipelineFilePublishType.HARVEST_UPLOAD
             self.file_collection.add(product_file)
 
-            self._cleanup_previous_version(product_file.name, var)
+            self._cleanup_previous_version(product_file.name)
 
-    def _cleanup_previous_version(self, product_name, var):
-        """Delete any previously published version(s) of the product for this variable file.
+    def _cleanup_previous_version(self, product_name):
+        """Delete any previously published version(s) of the given product file.
         Ignores cases where the previous version has exactly the same file name, as this will simply be overwritten.
 
         :param product_name: Name of the newly generated product
-        :param var: Name of the variable of interest
         """
-        for old_product_url in self.old_product_files.get(var, []):
+        product_type = get_product_type(product_name)
+        for old_product_url in self.old_product_files.get(product_type, []):
             if os.path.basename(old_product_url) != product_name:
                 old_file = PipelineFile(old_product_url, dest_path=old_product_url, is_deletion=True,
                                         late_deletion=True, file_update_callback=self._file_update_callback)
@@ -253,11 +304,14 @@ def preprocess(self):
 
         # TODO: Run compliance checks and remove non-compliant files from the input list (log them).
 
-        self._make_aggregated_timeseries()
+        if 'aggregated' in self.products_to_create:
+            self._make_aggregated_timeseries()
+        if 'hourly' in self.products_to_create:
+            self._make_hourly_timeseries()
 
         # TODO: Include the list of excluded files as another table in the notification email (instead of the log)
         if self.excluded_files:
-            self.logger.warning("Files exluded from aggregations:")
+            self.logger.warning("Files exluded from some of the products generated:")
             for f, e in self.excluded_files.items():
                 self.logger.warning("'{f}': {e}".format(f=f, e=list(e)))
 

diff --git a/aodndata/version.py b/aodndata/version.py
@@ -1 +1 @@
-__version__ = '1.0.14'
+__version__ = '1.1.0'
diff --git a/setup.py b/setup.py
@@ -54,7 +54,7 @@
 
 INSTALL_REQUIRES = [
     'aodncore>=1.0.0',
-    'aodntools>=1.0.0',
+    'aodntools>=1.1.0',
     'cc-plugin-imos>=1.3.0',
     'fiona>=1.8.8',
     'matplotlib>=3.0.3',
@@ -80,7 +80,7 @@
 
 setup(
     name=PACKAGE_NAME,
-    version='1.0.14',
+    version='1.1.0',
     packages=find_packages(exclude=PACKAGE_EXCLUDES),
     url='https://github.com/aodn',
     license='GPLv3',

diff --git a/test_aodndata/moorings/getFeature_empty.json b/test_aodndata/moorings/getFeature_empty.json
@@ -0,0 +1,9 @@
+{
+    "type": "FeatureCollection",
+    "features": [],
+    "totalFeatures": 0,
+    "numberMatched": 0,
+    "numberReturned": 0,
+    "timeStamp": "2020-02-19T00:00:00Z",
+    "crs": null
+}
diff --git a/test_aodndata/moorings/getFeature_old_products.json b/test_aodndata/moorings/getFeature_old_products.json
@@ -24,11 +24,19 @@
             "properties": {
                 "url": "IMOS/ANMN/NRS/NRSROT/aggregated_timeseries/IMOS_ANMN-NRS_TZ_20081120_NRSROT_FV01_DOX1-2-aggregated-timeseries_END-20190523_C-20190819.nc"
             }
+        },
+        {
+            "type": "Feature",
+            "id": "moorings_all_map.fid--44e8da32_16d0014d48c_7b20",
+            "geometry": null,
+            "properties": {
+                "url": "IMOS/ANMN/NRS/NRSROT/hourly_timeseries/IMOS_ANMN-NRS_STZ_20081120_NRSROT_FV02_hourly-timeseries_END-20190523_C-20191010.nc"
+            }
         }
     ],
-    "totalFeatures": 3,
-    "numberMatched": 3,
-    "numberReturned": 3,
+    "totalFeatures": 4,
+    "numberMatched": 4,
+    "numberReturned": 4,
     "timeStamp": "2019-12-05T06:33:53.380Z",
     "crs": null
 }