Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into fix_checkavailability
Browse files Browse the repository at this point in the history
  • Loading branch information
maxnoe committed May 9, 2017
2 parents 36cd9f7 + de81bf9 commit 8a7edb0
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 84 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ In the main you can choose the parameters for your search (at the moment: the be
The parameter *timedelta_in_minutes* determines the maximum allowed time lag between the timestamp of the data-file and the timestamp of the appropriate drs-file. The default value is 30 minutes. Which works fine in my experience.
Dates are given in the usual FACT convention: YYYYMMDD.

##Requirements
## Requirements
- FACT-Tools
- Java 1.8+
- Python 3.5+ (srsly. 3.5 please)
Expand Down
84 changes: 5 additions & 79 deletions erna/hdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,93 +3,20 @@
from astropy.io import fits
from tqdm import tqdm
import sys
from fact.io import append_to_h5py, initialize_h5py

log = logging.getLogger(__name__)

native_byteorder = {'little': '<', 'big': '>'}[sys.byteorder]


def initialize_hdf5(f, dtypes, groupname='events', **kwargs):
'''
Create a group with name `groupname` and empty datasets for each
entry in dtypes.
Parameters
----------
f: h5py.File
the hdf5 file, opened either in write or append mode
dtypes: numpy.dtype
the numpy dtype object of a record or structured array describing
the columns
groupname: str
the name for the hdf5 group to hold all datasets, default: data
'''
group = f.create_group(groupname)

for name in dtypes.names:
dtype = dtypes[name]
maxshape = [None] + list(dtype.shape)
shape = [0] + list(dtype.shape)

group.create_dataset(
name,
shape=tuple(shape),
maxshape=tuple(maxshape),
dtype=dtype.base,
**kwargs
)

return group


def append_to_hdf5(f, array, groupname='events'):
'''
Append a numpy record or structured array to the given hdf5 file
The file should have been previously initialized with initialize_hdf5
Parameters
----------
f: h5py.File
the hdf5 file, opened either in write or append mode
array: numpy.array or numpy.recarray
the numpy array to append
groupname: str
the name for the hdf5 group with the corresponding data sets
'''

group = f.get(groupname)

for key in array.dtype.names:
dataset = group.get(key)

n_existing_rows = dataset.shape[0]
n_new_rows = array[key].shape[0]

dataset.resize(n_existing_rows + n_new_rows, axis=0)

# swap byteorder if not native
if array[key].dtype.byteorder not in ('=', native_byteorder):
data = array[key].newbyteorder().byteswap()
else:
data = array[key]

if data.ndim == 1:
dataset[n_existing_rows:] = data

elif data.ndim == 2:
dataset[n_existing_rows:, :] = data

else:
raise NotImplementedError('Only 1d and 2d arrays are supported at this point')


def write_fits_to_hdf5(
outputfile,
inputfiles,
mode='a',
compression='gzip',
progress=True,
groupname='events',
key='events',
):

initialized = False
Expand All @@ -102,13 +29,12 @@ def write_fits_to_hdf5(
continue

if not initialized:
initialize_hdf5(
initialize_h5py(
hdf_file,
f[1].data.dtype,
groupname=groupname,
key=key,
compression=compression,
)
initialized = True


append_to_hdf5(hdf_file, f[1].data, groupname=groupname)
append_to_h5py(hdf_file, f[1].data, key=key)
7 changes: 4 additions & 3 deletions erna/scripts/gather_fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import sys
import os
import numpy as np
from fact.io import append_to_h5py, initialize_h5py

from ..automatic_processing.database import (
database, Job, RawDataFile, Jar, XML, ProcessingState
)
from ..utils import load_config, create_mysql_engine
from ..hdf_utils import write_fits_to_hdf5, append_to_hdf5, initialize_hdf5
from ..hdf_utils import write_fits_to_hdf5
from ..datacheck import get_runs
from ..datacheck_conditions import conditions as datacheck_conditions

Expand Down Expand Up @@ -139,8 +140,8 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
sys.exit()

with h5py.File(outputfile, 'w') as f:
initialize_hdf5(f, dtypes=runs_array.dtype, groupname='runs')
append_to_hdf5(f, runs_array, groupname='runs')
initialize_h5py(f, dtypes=runs_array.dtype, key='runs')
append_to_h5py(f, runs_array, key='runs')

f['runs'].attrs['datacheck'] = ' AND '.join(conditions)

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='erna',
version='0.2.1',
version='0.3.0',
description='Easy RuN Access. Tools that help to do batch processing of FACT data',
url='https://github.com/fact-project/erna',
author='Kai Brügge, Jens Buss, Maximilian Nöthe',
Expand All @@ -26,6 +26,7 @@
'PyMySQL', # in anaconda
'pytz', # in anaconda
'tables', # needs to be installed by pip for some reason
'pyfact>=0.9.4',
'astropy',
'h5py',
# 'hdf5',
Expand Down

0 comments on commit 8a7edb0

Please sign in to comment.