From 9f0a45f20fbf685890672cb636844d16a35227d0 Mon Sep 17 00:00:00 2001
From: Eike Middell <eike@middell.net>
Date: Mon, 16 Dec 2024 16:18:26 +0100
Subject: [PATCH 1/2] handle more corner cases in _read_str

---
 snirf/pysnirf2.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/snirf/pysnirf2.py b/snirf/pysnirf2.py
index 044c81c..4dfd2a1 100644
--- a/snirf/pysnirf2.py
+++ b/snirf/pysnirf2.py
@@ -365,17 +365,29 @@ def _read_string(dataset: h5py.Dataset) -> str:
     """
     if type(dataset) is not h5py.Dataset:
         raise TypeError("'dataset' must be type h5py.Dataset")
+    
     # Because many SNIRF files are saved with string values in length 1 arrays
-    try:
-        if dataset.ndim > 0:
-            return str(dataset[0].decode('ascii'))
+    if dataset.ndim > 0:
+        if len(dataset) > 0:
+            tentative_str = dataset[0]
         else:
-            return str(dataset[()].decode('ascii'))
-    except AttributeError:  # If we expected a string and got something else, `decode` isn't there
+            return ""
+    else:
+       tentative_str = dataset[()]
+
+    
+    if hasattr(tentative_str, 'decode'):
+        # tentative_str is a byte str -> decode
+        return str(tentative_str.decode('ascii'))
+    elif isinstance(tentative_str, np.ndarray) and tentative_str.dtype == np.dtype("S1"):
+        # tentative_str is a numpy array of bytes -> join -> decode
+        tentative_str = b''.join(tentative_str)
+        return str(tentative_str.decode('ascii'))
+    else:
         warn(
             'Expected dataset {} to be stringlike, is {} conversion may be incorrect'
             .format(dataset.name, dataset.dtype), SnirfFormatError)
-        return str(dataset[0])
+        return str(tentative_str)
 
 
 def _read_int(dataset: h5py.Dataset) -> int:

From 41c111479f58072efa30a7fd46d38150853a136e Mon Sep 17 00:00:00 2001
From: Eike Middell <eike@middell.net>
Date: Tue, 17 Dec 2024 09:30:04 +0100
Subject: [PATCH 2/2] for empty int/float/str datasets return None

---
 snirf/pysnirf2.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/snirf/pysnirf2.py b/snirf/pysnirf2.py
index 4dfd2a1..10565e5 100644
--- a/snirf/pysnirf2.py
+++ b/snirf/pysnirf2.py
@@ -355,7 +355,7 @@ def _read_dataset(dataset: h5py.Dataset):
         ]))
 
 
-def _read_string(dataset: h5py.Dataset) -> str:
+def _read_string(dataset: h5py.Dataset) -> str | None:
     """Reads the contents of an `h5py.Dataset` to a `str`.
 
     Args:
@@ -371,7 +371,7 @@ def _read_string(dataset: h5py.Dataset) -> str:
         if len(dataset) > 0:
             tentative_str = dataset[0]
         else:
-            return ""
+            return None
     else:
        tentative_str = dataset[()]
 
@@ -390,7 +390,7 @@ def _read_string(dataset: h5py.Dataset) -> str:
         return str(tentative_str)
 
 
-def _read_int(dataset: h5py.Dataset) -> int:
+def _read_int(dataset: h5py.Dataset) -> int | None:
     """Reads the contents of an `h5py.Dataset` to an `int`.
 
     Args:
@@ -401,12 +401,15 @@ def _read_int(dataset: h5py.Dataset) -> int:
     if type(dataset) is not h5py.Dataset:
         raise TypeError("'dataset' must be type h5py.Dataset")
     if dataset.ndim > 0:
-        return int(dataset[0])
+        if len(dataset) > 0:
+            return int(dataset[0])
+        else:
+            return None
     else:
         return int(dataset[()])
 
 
-def _read_float(dataset: h5py.Dataset) -> float:
+def _read_float(dataset: h5py.Dataset) -> float | None:
     """Reads the contents of an `h5py.Dataset` to a `float`.
 
     Args:
@@ -417,7 +420,10 @@ def _read_float(dataset: h5py.Dataset) -> float:
     if type(dataset) is not h5py.Dataset:
         raise TypeError("'dataset' must be type h5py.Dataset")
     if dataset.ndim > 0:
-        return float(dataset[0])
+        if len(dataset) > 0:
+            return float(dataset[0])
+        else:
+            return None
     else:
         return float(dataset[()])