Merge remote-tracking branch 'origin/master' into fix_checkavailability

fact-project · May 9, 2017 · 8a7edb0 · 8a7edb0
2 parents 36cd9f7 + de81bf9
commit 8a7edb0
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 84 deletions.
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ In the main you can choose the parameters for your search (at the moment: the be
 The parameter *timedelta_in_minutes* determines the maximum allowed time lag between the timestamp of the data-file and the timestamp of the appropriate drs-file. The default value is 30 minutes. Which works fine in my experience.
 Dates are given in the usual FACT convention: YYYYMMDD.
 
-##Requirements
+## Requirements
   - FACT-Tools
   - Java 1.8+
   - Python 3.5+ (srsly. 3.5 please)

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
@@ -3,93 +3,20 @@
 from astropy.io import fits
 from tqdm import tqdm
 import sys
+from fact.io import append_to_h5py, initialize_h5py
 
 log = logging.getLogger(__name__)
 
 native_byteorder = {'little': '<', 'big': '>'}[sys.byteorder]
 
 
-def initialize_hdf5(f, dtypes, groupname='events', **kwargs):
-    '''
-    Create a group with name `groupname` and empty datasets for each
-    entry in dtypes.
-
-    Parameters
-    ----------
-    f: h5py.File
-        the hdf5 file, opened either in write or append mode
-    dtypes: numpy.dtype
-        the numpy dtype object of a record or structured array describing
-        the columns
-    groupname: str
-        the name for the hdf5 group to hold all datasets, default: data
-    '''
-    group = f.create_group(groupname)
-
-    for name in dtypes.names:
-        dtype = dtypes[name]
-        maxshape = [None] + list(dtype.shape)
-        shape = [0] + list(dtype.shape)
-
-        group.create_dataset(
-            name,
-            shape=tuple(shape),
-            maxshape=tuple(maxshape),
-            dtype=dtype.base,
-            **kwargs
-        )
-
-    return group
-
-
-def append_to_hdf5(f, array, groupname='events'):
-    '''
-    Append a numpy record or structured array to the given hdf5 file
-    The file should have been previously initialized with initialize_hdf5
-
-    Parameters
-    ----------
-    f: h5py.File
-        the hdf5 file, opened either in write or append mode
-    array: numpy.array or numpy.recarray
-        the numpy array to append
-    groupname: str
-        the name for the hdf5 group with the corresponding data sets
-    '''
-
-    group = f.get(groupname)
-
-    for key in array.dtype.names:
-        dataset = group.get(key)
-
-        n_existing_rows = dataset.shape[0]
-        n_new_rows = array[key].shape[0]
-
-        dataset.resize(n_existing_rows + n_new_rows, axis=0)
-
-        # swap byteorder if not native
-        if array[key].dtype.byteorder not in ('=', native_byteorder):
-            data = array[key].newbyteorder().byteswap()
-        else:
-            data = array[key]
-
-        if data.ndim == 1:
-            dataset[n_existing_rows:] = data
-
-        elif data.ndim == 2:
-            dataset[n_existing_rows:, :] = data
-
-        else:
-            raise NotImplementedError('Only 1d and 2d arrays are supported at this point')
-
-
 def write_fits_to_hdf5(
         outputfile,
         inputfiles,
         mode='a',
         compression='gzip',
         progress=True,
-        groupname='events',
+        key='events',
         ):
 
     initialized = False
@@ -102,13 +29,12 @@ def write_fits_to_hdf5(
                     continue
 
                 if not initialized:
-                    initialize_hdf5(
+                    initialize_h5py(
                         hdf_file,
                         f[1].data.dtype,
-                        groupname=groupname,
+                        key=key,
                         compression=compression,
                     )
                     initialized = True
 
-
-                append_to_hdf5(hdf_file, f[1].data, groupname=groupname)
+                append_to_h5py(hdf_file, f[1].data, key=key)
diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py
@@ -5,12 +5,13 @@
 import sys
 import os
 import numpy as np
+from fact.io import append_to_h5py, initialize_h5py
 
 from ..automatic_processing.database import (
     database, Job, RawDataFile, Jar, XML, ProcessingState
 )
 from ..utils import load_config, create_mysql_engine
-from ..hdf_utils import write_fits_to_hdf5, append_to_hdf5, initialize_hdf5
+from ..hdf_utils import write_fits_to_hdf5
 from ..datacheck import get_runs
 from ..datacheck_conditions import conditions as datacheck_conditions
 
@@ -139,8 +140,8 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
             sys.exit()
 
     with h5py.File(outputfile, 'w') as f:
-        initialize_hdf5(f, dtypes=runs_array.dtype, groupname='runs')
-        append_to_hdf5(f, runs_array, groupname='runs')
+        initialize_h5py(f, dtypes=runs_array.dtype, key='runs')
+        append_to_h5py(f, runs_array, key='runs')
 
         f['runs'].attrs['datacheck'] = ' AND '.join(conditions)
 

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='erna',
-    version='0.2.1',
+    version='0.3.0',
     description='Easy RuN Access. Tools that help to do batch processing of FACT data',
     url='https://github.com/fact-project/erna',
     author='Kai Brügge, Jens Buss, Maximilian Nöthe',
@@ -26,6 +26,7 @@
         'PyMySQL',          # in anaconda
         'pytz',             # in anaconda
         'tables',           # needs to be installed by pip for some reason
+        'pyfact>=0.9.4',
         'astropy',
         'h5py',
         # 'hdf5',