From bdaf90e3339ab9b516b25cb4b26bce641c6ed918 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20N=C3=B6the?= <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:12:48 +0200
Subject: [PATCH 1/7] Use pyfact for run group and add azimuth

---
 erna/datacheck.py           |  3 +--
 erna/scripts/gather_fits.py | 38 +++++++++++++------------------------
 setup.py                    |  4 ++--
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/erna/datacheck.py b/erna/datacheck.py
index 22078dd..a7109ab 100644
--- a/erna/datacheck.py
+++ b/erna/datacheck.py
@@ -1,13 +1,12 @@
-from .utils import load_config
 import pandas as pd
 
-
 default_columns = (
     'fNight AS night',
     'fRunID AS run_id',
     'fSourceName AS source',
     'TIMESTAMPDIFF(SECOND, fRunStart, fRunStop) * fEffectiveOn AS ontime',
     'fZenithDistanceMean AS zenith',
+    'fAzimuthMean AS azimuth',
     'fRunStart AS run_start',
     'fRunStop AS run_stop',
 )
diff --git a/erna/scripts/gather_fits.py b/erna/scripts/gather_fits.py
index 3bd4750..f1b7523 100644
--- a/erna/scripts/gather_fits.py
+++ b/erna/scripts/gather_fits.py
@@ -5,7 +5,7 @@
 import sys
 import os
 import numpy as np
-from fact.io import append_to_h5py, initialize_h5py
+from fact.io import to_h5py
 
 from ..automatic_processing.database import (
     database, Job, RawDataFile, Jar, XML, ProcessingState
@@ -113,36 +113,24 @@ def main(xml_name, ft_version, outputfile, config, start, end, source, datacheck
         len(jobs), jobs.ontime.sum()/3600
     ))
 
-    runs_array = np.core.rec.fromarrays(
-        [
-            successful_jobs['night'],
-            successful_jobs['run_id'],
-            successful_jobs['source'].values.astype('S'),
-            successful_jobs['ontime'],
-            successful_jobs['zenith'],
-            successful_jobs['run_start'].values.astype('S'),
-            successful_jobs['run_stop'].values.astype('S'),
-        ],
-        names=(
-            'night',
-            'run_id',
-            'source',
-            'ontime',
-            'zenith',
-            'run_start',
-            'run_stop',
-        )
-    )
-
     if os.path.isfile(outputfile):
         a = input('Outputfile exists! Overwrite? [y, N]: ')
         if not a.lower().startswith('y'):
             sys.exit()
 
-    with h5py.File(outputfile, 'w') as f:
-        initialize_h5py(f, dtypes=runs_array.dtype, key='runs')
-        append_to_h5py(f, runs_array, key='runs')
+    columns = [
+        'night',
+        'run_id',
+        'source',
+        'ontime',
+        'zenith',
+        'azimuth',
+        'run_start',
+        'run_stop',
+    ]
+    to_h5py(outputfile, successful_jobs[columns], key='runs', mode='w')
 
+    with h5py.File(outputfile, 'a') as f:
         f['runs'].attrs['datacheck'] = ' AND '.join(conditions)
 
     write_fits_to_hdf5(outputfile, successful_jobs.result_file, mode='a')
diff --git a/setup.py b/setup.py
index 7ba7ec8..d2ce618 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='erna',
-    version='0.3.0',
+    version='0.4.0',
     description='Easy RuN Access. Tools that help to do batch processing of FACT data',
     url='https://github.com/fact-project/erna',
     author='Kai Brügge, Jens Buss, Maximilian Nöthe',
@@ -26,7 +26,7 @@
         'PyMySQL',          # in anaconda
         'pytz',             # in anaconda
         'tables',           # needs to be installed by pip for some reason
-        'pyfact>=0.9.4',
+        'pyfact>=0.10.5',
         'astropy',
         'h5py',
         # 'hdf5',

From dd9de00355f09e63ac36e60af92fb6a06e32f959 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20N=C3=B6the?= <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:13:40 +0200
Subject: [PATCH 2/7] Rename columns to snake case and add theta_deg columns

---
 erna/hdf_utils.py | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
index 74e643f..10aeae0 100644
--- a/erna/hdf_utils.py
+++ b/erna/hdf_utils.py
@@ -4,12 +4,33 @@
 from tqdm import tqdm
 import sys
 from fact.io import append_to_h5py, initialize_h5py
+from fact.instrument import camera_distance_mm_to_deg
+import re
+from numpy.lib import recfunctions
 
 log = logging.getLogger(__name__)
 
 native_byteorder = {'little': '<', 'big': '>'}[sys.byteorder]
 
 
+theta_columns = tuple(
+    ['theta'] + ['theta_off_{}'.format(i) for i in range(1, 6)]
+)
+
+theta_deg_columns = tuple(
+    ['theta_deg'] + ['theta_deg_off_{}'.format(i) for i in range(1, 6)]
+)
+
+snake_re_1 = re.compile('(.)([A-Z][a-z]+)')
+snake_re_2 = re.compile('([a-z0-9])([A-Z])')
+
+
+def camel2snake(key):
+    ''' see http://stackoverflow.com/a/1176023/3838691 '''
+    s1 = snake_re_1.sub(r'\1_\2', key)
+    return snake_re_2.sub(r'\1_\2', s1).lower().replace('__', '_')
+
+
 def write_fits_to_hdf5(
         outputfile,
         inputfiles,
@@ -28,13 +49,27 @@ def write_fits_to_hdf5(
                 if len(f) < 2:
                     continue
 
+                array = f[1].data[:]
+
+                # convert all names to snake case
+                array.dtype.names = list(map(camel2snake, array.dtype.names))
+
+                # add columns with theta in degrees
+                for in_col, out_col in zip(theta_columns, theta_deg_columns):
+                    if in_col in array.dtype.names:
+                        recfunctions.append_fields(
+                            array,
+                            out_col,
+                            camera_distance_mm_to_deg(in_col)
+                        )
+
                 if not initialized:
                     initialize_h5py(
                         hdf_file,
-                        f[1].data.dtype,
+                        array.data.dtype,
                         key=key,
                         compression=compression,
                     )
                     initialized = True
 
-                append_to_h5py(hdf_file, f[1].data, key=key)
+                append_to_h5py(hdf_file, array, key=key)

From 7c294f7906e36bb60881709c4fd8c609b808afc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20N=C3=B6the?= <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:22:43 +0200
Subject: [PATCH 3/7] Rename RUNID to run_id

---
 erna/hdf_utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
index 10aeae0..d88573e 100644
--- a/erna/hdf_utils.py
+++ b/erna/hdf_utils.py
@@ -25,12 +25,19 @@
 snake_re_2 = re.compile('([a-z0-9])([A-Z])')
 
 
+renames = {'RUNID': 'run_id'}
+
+
 def camel2snake(key):
     ''' see http://stackoverflow.com/a/1176023/3838691 '''
     s1 = snake_re_1.sub(r'\1_\2', key)
     return snake_re_2.sub(r'\1_\2', s1).lower().replace('__', '_')
 
 
+def rename_columns(columns):
+    return [camel2snake(renames.get(col, col)) for col in columns]
+
+
 def write_fits_to_hdf5(
         outputfile,
         inputfiles,
@@ -52,7 +59,7 @@ def write_fits_to_hdf5(
                 array = f[1].data[:]
 
                 # convert all names to snake case
-                array.dtype.names = list(map(camel2snake, array.dtype.names))
+                array.dtype.names = rename_columns(array.dtype.names)
 
                 # add columns with theta in degrees
                 for in_col, out_col in zip(theta_columns, theta_deg_columns):

From 33301605182503cfaaacaa2e7e73e196b9356de8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20N=C3=B6the?= <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:30:03 +0200
Subject: [PATCH 4/7] remove fs and Ms

---
 erna/hdf_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
index d88573e..c7542fd 100644
--- a/erna/hdf_utils.py
+++ b/erna/hdf_utils.py
@@ -31,7 +31,9 @@
 def camel2snake(key):
     ''' see http://stackoverflow.com/a/1176023/3838691 '''
     s1 = snake_re_1.sub(r'\1_\2', key)
-    return snake_re_2.sub(r'\1_\2', s1).lower().replace('__', '_')
+    s2 = snake_re_2.sub(r'\1_\2', s1).lower().replace('__', '_')
+    s3 = re.sub('^m_', '', s2)
+    return s3.replace('.f_', '_')
 
 
 def rename_columns(columns):

From 9a83552e44d34d0dec5a4af95b1b9fa813586877 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20N=C3=B6the?= <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:38:02 +0200
Subject: [PATCH 5/7] Fix conversion of theta

---
 erna/hdf_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
index c7542fd..9fb4c75 100644
--- a/erna/hdf_utils.py
+++ b/erna/hdf_utils.py
@@ -69,7 +69,7 @@ def write_fits_to_hdf5(
                         recfunctions.append_fields(
                             array,
                             out_col,
-                            camera_distance_mm_to_deg(in_col)
+                            camera_distance_mm_to_deg(array[in_col])
                         )
 
                 if not initialized:

From 90a84da08fe3cb7355d4c345dc86c7f118e7134d Mon Sep 17 00:00:00 2001
From: Maximilian Noethe <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:54:53 +0200
Subject: [PATCH 6/7] Fix to hdf conversion

---
 erna/automatic_processing/xmls/test.xml | 69 +++++++++++++++++++++++++
 erna/hdf_utils.py                       |  7 +--
 2 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 erna/automatic_processing/xmls/test.xml

diff --git a/erna/automatic_processing/xmls/test.xml b/erna/automatic_processing/xmls/test.xml
new file mode 100644
index 0000000..d7b0618
--- /dev/null
+++ b/erna/automatic_processing/xmls/test.xml
@@ -0,0 +1,69 @@
+<container>
+
+  <properties url="classpath:/default/settings.properties" />
+
+  <property name="infile" value="file:src/main/resources/testDataFile.fits.gz" />
+  <property name="drsfile" value="file:src/main/resources/testDrsFile.drs.fits.gz" />
+
+  <property name="integralGainFile" value="classpath:/default/gain_sorted_20131127.csv" />
+  <property name="pixelDelayFile" value="classpath:/default/delays_lightpulser_20150217.csv" />
+
+  <property name="output_basename" value="output" />
+
+  <!-- <property name="fits_features" value="${keysForOutput}"/> -->
+  <property name="json_features" value="PhotonArrivals,PhotonArrivalsBaseLine"/>
+
+  <property name="aux_dir" value="file:src/main/resources/aux/" />
+  <service id="auxService" class="fact.auxservice.AuxFileService" auxFolder="${aux_dir}" />
+
+  <service id="calibService" class="fact.calibrationservice.ConstantCalibService" />
+
+  <stream id="fact" class="fact.io.zfits.ZFitsStream"  url="${infile}" limit="50"/>
+
+  <process id="2" input="fact">
+  <!-- prevEventAndSkip: -->
+  <!-- PreviousEventInfo, Skip(no Data Trigger) -->
+  <include url="classpath:/default/data/prevEventAndSkip.xml" />
+  <!-- Output: Data -->
+
+  <!-- Calibration: -->
+  <!-- DrsCalibration, PatchJumpRemoval, RemoveSpikes,
+  DrsTimeCalibration, ArrayTimeCorrection, InterpolateBadPixel -->
+  <include url="classpath:/default/data/calibration.xml" />
+  <!-- Output: DataCalibrated -->
+
+  <fact.extraction.SinglePulseExtraction
+          dataKey="DataCalibrated"
+          outputKey="PhotonArrivals"
+  />
+
+  <!-- Extraction -->
+  <!-- BasicExtraction, RisingEdgeForPositions, RisingEdgePolynomFit, TimeOverThreshold,
+  PhotonChargeTimeOverThreshold, HandleSaturation, CorrectPixelDelays-->
+  <include url="classpath:/default/data/extraction.xml" />     
+  <!-- Output: photoncharge, arrivalTime -->   
+
+  <!-- Cleaning -->
+  <!-- SourcePosition(Cetatauri), CoreNeighborCleanTimeNeighbor-->
+  <include url="classpath:/default/data/cleaning.xml" />     
+  <!-- Output: shower -->   
+
+  <!-- Parameter calculation (only source independent) -->
+  <!-- ArrayMean(photoncharge,arrivalTime), ArrayStatistics(photoncharge,arrivalTime,maxSlopes,
+  arrivalTimePos,maxSlopesPos,maxAmplitudePosition,photonchargeSaturated,arrivalTimeTOT), 
+  Size, DistributionFromShower, M3Long, Length, Width, NumberOfIslands, TimeGraident,
+  Concentration, ConcentrationCore, ConcentrationAtCenterOfGravity, Leakage, TimeSpread,
+  ShowerSlope, Disp -->
+  <include url="classpath:/default/data/parameterCalc.xml" /> 
+  <!-- Output: source independent parameters -->   
+
+  <!-- Parameter calculation (only source dependent) -->
+  <!-- SourcePosition(${sourcename}), AntiSourcePosition(5), Alpha(for 6 Sources),
+  Distance(for 6 Sources), CosDeltaAlpha(for 6 Sources), Theta(for 6 Sources) -->
+  <include url="classpath:/default/data/sourceParameter.xml" /> 
+  <!-- Output: source dependent parameters -->
+
+  <fact.io.FITSWriter url="file:facttools_output/${output_basename}_level2.fits" keys="${keysForOutput}" />
+  <fact.io.JSONWriter url="file:facttools_output/${output_basename}_singlepe.jsonl.gz" keys="${json_features}" jsonl="true" gzip="true"/>
+  </process>
+</container>
diff --git a/erna/hdf_utils.py b/erna/hdf_utils.py
index 9fb4c75..6da988d 100644
--- a/erna/hdf_utils.py
+++ b/erna/hdf_utils.py
@@ -7,6 +7,7 @@
 from fact.instrument import camera_distance_mm_to_deg
 import re
 from numpy.lib import recfunctions
+import numpy as np
 
 log = logging.getLogger(__name__)
 
@@ -25,7 +26,7 @@
 snake_re_2 = re.compile('([a-z0-9])([A-Z])')
 
 
-renames = {'RUNID': 'run_id'}
+renames = {'RUNID': 'run_id', 'COGx': 'cog_x', 'COGy': 'cog_y'}
 
 
 def camel2snake(key):
@@ -58,7 +59,7 @@ def write_fits_to_hdf5(
                 if len(f) < 2:
                     continue
 
-                array = f[1].data[:]
+                array = np.array(f[1].data[:])
 
                 # convert all names to snake case
                 array.dtype.names = rename_columns(array.dtype.names)
@@ -75,7 +76,7 @@ def write_fits_to_hdf5(
                 if not initialized:
                     initialize_h5py(
                         hdf_file,
-                        array.data.dtype,
+                        array.dtype,
                         key=key,
                         compression=compression,
                     )

From d10c2d7917af268e43d0fca66fe439ae8e252a45 Mon Sep 17 00:00:00 2001
From: Maximilian Noethe <maximilian.noethe@tu-dortmund.de>
Date: Fri, 19 May 2017 13:56:13 +0200
Subject: [PATCH 7/7] Remove accidentally commited file

---
 erna/automatic_processing/xmls/test.xml | 69 -------------------------
 1 file changed, 69 deletions(-)
 delete mode 100644 erna/automatic_processing/xmls/test.xml

diff --git a/erna/automatic_processing/xmls/test.xml b/erna/automatic_processing/xmls/test.xml
deleted file mode 100644
index d7b0618..0000000
--- a/erna/automatic_processing/xmls/test.xml
+++ /dev/null
@@ -1,69 +0,0 @@
-<container>
-
-  <properties url="classpath:/default/settings.properties" />
-
-  <property name="infile" value="file:src/main/resources/testDataFile.fits.gz" />
-  <property name="drsfile" value="file:src/main/resources/testDrsFile.drs.fits.gz" />
-
-  <property name="integralGainFile" value="classpath:/default/gain_sorted_20131127.csv" />
-  <property name="pixelDelayFile" value="classpath:/default/delays_lightpulser_20150217.csv" />
-
-  <property name="output_basename" value="output" />
-
-  <!-- <property name="fits_features" value="${keysForOutput}"/> -->
-  <property name="json_features" value="PhotonArrivals,PhotonArrivalsBaseLine"/>
-
-  <property name="aux_dir" value="file:src/main/resources/aux/" />
-  <service id="auxService" class="fact.auxservice.AuxFileService" auxFolder="${aux_dir}" />
-
-  <service id="calibService" class="fact.calibrationservice.ConstantCalibService" />
-
-  <stream id="fact" class="fact.io.zfits.ZFitsStream"  url="${infile}" limit="50"/>
-
-  <process id="2" input="fact">
-  <!-- prevEventAndSkip: -->
-  <!-- PreviousEventInfo, Skip(no Data Trigger) -->
-  <include url="classpath:/default/data/prevEventAndSkip.xml" />
-  <!-- Output: Data -->
-
-  <!-- Calibration: -->
-  <!-- DrsCalibration, PatchJumpRemoval, RemoveSpikes,
-  DrsTimeCalibration, ArrayTimeCorrection, InterpolateBadPixel -->
-  <include url="classpath:/default/data/calibration.xml" />
-  <!-- Output: DataCalibrated -->
-
-  <fact.extraction.SinglePulseExtraction
-          dataKey="DataCalibrated"
-          outputKey="PhotonArrivals"
-  />
-
-  <!-- Extraction -->
-  <!-- BasicExtraction, RisingEdgeForPositions, RisingEdgePolynomFit, TimeOverThreshold,
-  PhotonChargeTimeOverThreshold, HandleSaturation, CorrectPixelDelays-->
-  <include url="classpath:/default/data/extraction.xml" />     
-  <!-- Output: photoncharge, arrivalTime -->   
-
-  <!-- Cleaning -->
-  <!-- SourcePosition(Cetatauri), CoreNeighborCleanTimeNeighbor-->
-  <include url="classpath:/default/data/cleaning.xml" />     
-  <!-- Output: shower -->   
-
-  <!-- Parameter calculation (only source independent) -->
-  <!-- ArrayMean(photoncharge,arrivalTime), ArrayStatistics(photoncharge,arrivalTime,maxSlopes,
-  arrivalTimePos,maxSlopesPos,maxAmplitudePosition,photonchargeSaturated,arrivalTimeTOT), 
-  Size, DistributionFromShower, M3Long, Length, Width, NumberOfIslands, TimeGraident,
-  Concentration, ConcentrationCore, ConcentrationAtCenterOfGravity, Leakage, TimeSpread,
-  ShowerSlope, Disp -->
-  <include url="classpath:/default/data/parameterCalc.xml" /> 
-  <!-- Output: source independent parameters -->   
-
-  <!-- Parameter calculation (only source dependent) -->
-  <!-- SourcePosition(${sourcename}), AntiSourcePosition(5), Alpha(for 6 Sources),
-  Distance(for 6 Sources), CosDeltaAlpha(for 6 Sources), Theta(for 6 Sources) -->
-  <include url="classpath:/default/data/sourceParameter.xml" /> 
-  <!-- Output: source dependent parameters -->
-
-  <fact.io.FITSWriter url="file:facttools_output/${output_basename}_level2.fits" keys="${keysForOutput}" />
-  <fact.io.JSONWriter url="file:facttools_output/${output_basename}_singlepe.jsonl.gz" keys="${json_features}" jsonl="true" gzip="true"/>
-  </process>
-</container>