diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 47f2d8ee19a..01e3bd1cf1d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1316,7 +1316,14 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = encoding.pop("units", None) calendar = encoding.pop("calendar", None) dtype = encoding.pop("dtype", None) + + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" (data, units, calendar) = encode_cf_datetime(data, units, calendar, dtype) + # if no dtype is provided, preserve data.dtype in encoding if dtype is None: safe_setitem(encoding, "dtype", data.dtype, name=name) @@ -1371,17 +1378,20 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) + dtype = encoding.pop("dtype", None) + # in the case of packed data we need to encode into # float first, the correct dtype will be established # via CFScaleOffsetCoder/CFMaskCoder - dtype = None if "add_offset" in encoding or "scale_factor" in encoding: - encoding.pop("dtype") dtype = data.dtype if data.dtype.kind == "f" else "float64" - data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", dtype) - ) + data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) + + # if no dtype is provided, preserve data.dtype in encoding + if dtype is None: + safe_setitem(encoding, "dtype", data.dtype, name=name) + safe_setitem(attrs, "units", units, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 72125da3332..77e2f0602ce 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -484,25 +484,30 @@ def decode(self, variable: Variable, name: T_Name = None): ) if encoded_fill_values: - # special case DateTime to properly handle NaT - # we need to check if time-like will be decoded or not - # in further processing dtype: np.typing.DTypeLike decoded_fill_value: Any - is_time_like = _is_time_like(attrs.get("units")) - if ( - (is_time_like == "datetime" and self.decode_times) - or (is_time_like == "timedelta" and self.decode_timedelta) - ) and data.dtype.kind in "iu": - dtype, decoded_fill_value = np.int64, np.iinfo(np.int64).min + # in case of packed data we have to decode into float + # in any case + if "scale_factor" in attrs or "add_offset" in attrs: + dtype, decoded_fill_value = ( + _choose_float_dtype(data.dtype, attrs), + np.nan, + ) else: - if "scale_factor" not in attrs and "add_offset" not in attrs: - dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) - else: + # in case of no-packing special case DateTime/Timedelta to properly + # handle NaT, we need to check if time-like will be decoded + # or not in further processing + is_time_like = _is_time_like(attrs.get("units")) + if ( + (is_time_like == "datetime" and self.decode_times) + or (is_time_like == "timedelta" and self.decode_timedelta) + ) and data.dtype.kind in "iu": dtype, decoded_fill_value = ( - _choose_float_dtype(data.dtype, attrs), - np.nan, - ) + np.int64, + np.iinfo(np.int64).min, + ) # np.dtype(f"{is_time_like}64[s]") + else: + dtype, decoded_fill_value = dtypes.maybe_promote(data.dtype) transform = partial( _apply_mask, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 480bd96f3f4..8f031593a27 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -525,6 +525,24 @@ def test_decoded_cf_datetime_array_2d(time_unit: PDDatetimeUnitOptions) -> None: assert_array_equal(np.asarray(result), expected) +@pytest.mark.parametrize("decode_times", [True, False]) +@pytest.mark.parametrize("mask_and_scale", [True, False]) +def test_decode_datetime_mask_and_scale( + decode_times: bool, mask_and_scale: bool +) -> None: + attrs = { + "units": "nanoseconds since 1970-01-01", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable( + "foo", encoded, mask_and_scale=mask_and_scale, decode_times=decode_times + ) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_equal(encoded, result) + + FREQUENCIES_TO_ENCODING_UNITS = { "ns": "nanoseconds", "us": "microseconds", @@ -1914,7 +1932,7 @@ def test_lazy_decode_timedelta_error() -> None: def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: - attrs = {"units": "days", "_FillValue": np.int16(-1), "add_offset": 100.0} + attrs = {"units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0} encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta