From 9f0a45f20fbf685890672cb636844d16a35227d0 Mon Sep 17 00:00:00 2001 From: Eike Middell Date: Mon, 16 Dec 2024 16:18:26 +0100 Subject: [PATCH 1/2] handle more corner cases in _read_str --- snirf/pysnirf2.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/snirf/pysnirf2.py b/snirf/pysnirf2.py index 044c81c..4dfd2a1 100644 --- a/snirf/pysnirf2.py +++ b/snirf/pysnirf2.py @@ -365,17 +365,29 @@ def _read_string(dataset: h5py.Dataset) -> str: """ if type(dataset) is not h5py.Dataset: raise TypeError("'dataset' must be type h5py.Dataset") + # Because many SNIRF files are saved with string values in length 1 arrays - try: - if dataset.ndim > 0: - return str(dataset[0].decode('ascii')) + if dataset.ndim > 0: + if len(dataset) > 0: + tentative_str = dataset[0] else: - return str(dataset[()].decode('ascii')) - except AttributeError: # If we expected a string and got something else, `decode` isn't there + return "" + else: + tentative_str = dataset[()] + + + if hasattr(tentative_str, 'decode'): + # tentative_str is a byte str -> decode + return str(tentative_str.decode('ascii')) + elif isinstance(tentative_str, np.ndarray) and tentative_str.dtype == np.dtype("S1"): + # tentative_str is a numpy array of bytes -> join -> decode + tentative_str = b''.join(tentative_str) + return str(tentative_str.decode('ascii')) + else: warn( 'Expected dataset {} to be stringlike, is {} conversion may be incorrect' .format(dataset.name, dataset.dtype), SnirfFormatError) - return str(dataset[0]) + return str(tentative_str) def _read_int(dataset: h5py.Dataset) -> int: From 41c111479f58072efa30a7fd46d38150853a136e Mon Sep 17 00:00:00 2001 From: Eike Middell Date: Tue, 17 Dec 2024 09:30:04 +0100 Subject: [PATCH 2/2] for empty int/float/str datasets return None --- snirf/pysnirf2.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/snirf/pysnirf2.py b/snirf/pysnirf2.py index 4dfd2a1..10565e5 100644 --- a/snirf/pysnirf2.py +++ b/snirf/pysnirf2.py @@ -355,7 +355,7 @@ def _read_dataset(dataset: h5py.Dataset): ])) -def _read_string(dataset: h5py.Dataset) -> str: +def _read_string(dataset: h5py.Dataset) -> str | None: """Reads the contents of an `h5py.Dataset` to a `str`. Args: @@ -371,7 +371,7 @@ def _read_string(dataset: h5py.Dataset) -> str: if len(dataset) > 0: tentative_str = dataset[0] else: - return "" + return None else: tentative_str = dataset[()] @@ -390,7 +390,7 @@ def _read_string(dataset: h5py.Dataset) -> str: return str(tentative_str) -def _read_int(dataset: h5py.Dataset) -> int: +def _read_int(dataset: h5py.Dataset) -> int | None: """Reads the contents of an `h5py.Dataset` to an `int`. Args: @@ -401,12 +401,15 @@ def _read_int(dataset: h5py.Dataset) -> int: if type(dataset) is not h5py.Dataset: raise TypeError("'dataset' must be type h5py.Dataset") if dataset.ndim > 0: - return int(dataset[0]) + if len(dataset) > 0: + return int(dataset[0]) + else: + return None else: return int(dataset[()]) -def _read_float(dataset: h5py.Dataset) -> float: +def _read_float(dataset: h5py.Dataset) -> float | None: """Reads the contents of an `h5py.Dataset` to a `float`. Args: @@ -417,7 +420,10 @@ def _read_float(dataset: h5py.Dataset) -> float: if type(dataset) is not h5py.Dataset: raise TypeError("'dataset' must be type h5py.Dataset") if dataset.ndim > 0: - return float(dataset[0]) + if len(dataset) > 0: + return float(dataset[0]) + else: + return None else: return float(dataset[()])