Skip to content

Commit

Permalink
use mean of min/max years as offset in calculation of datetime64 mean (
Browse files Browse the repository at this point in the history
…pydata#10035)

* use mean of min/max years as offset in caclulation of datetime64 mean

* reinstate _datetime_nanmin as it is used downstream in flox<0.10.0

* add whats-new.rst entry

* add whats-new.rst entry
  • Loading branch information
kmuehlbauer authored Feb 7, 2025
1 parent d57f05c commit df2ecf4
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ Bug fixes
"nanoseconds" were chosen by default, which are optimal for
nanosecond-resolution times, but not for times with coarser resolution. By
`Spencer Clark <https://github.com/spencerkclark>`_ (:pull:`10017`).
- Use mean of min/max years as offset in calculation of datetime64 mean
(:issue:`10019`, :pull:`10035`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.


Documentation
Expand Down
17 changes: 12 additions & 5 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,11 @@ def array_any(array, axis=None, keepdims=False, **kwargs):


def _datetime_nanmin(array):
"""nanmin() function for datetime64.
return _datetime_nanreduce(array, min)


def _datetime_nanreduce(array, func):
"""nanreduce() function for datetime64.
Caveats that this function deals with:
Expand All @@ -562,7 +566,7 @@ def _datetime_nanmin(array):
assert dtypes.is_datetime_like(dtype)
# (NaT).astype(float) does not produce NaN...
array = where(pandas_isnull(array), np.nan, array.astype(float))
array = min(array, skipna=True)
array = func(array, skipna=True)
if isinstance(array, float):
array = np.array(array)
# ...but (NaN).astype("M8") does produce NaT
Expand Down Expand Up @@ -597,7 +601,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
# Set offset to minimum if not given
if offset is None:
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)
offset = _datetime_nanreduce(array, min)
else:
offset = min(array)

Expand Down Expand Up @@ -717,8 +721,11 @@ def mean(array, axis=None, skipna=None, **kwargs):

array = asarray(array)
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)

dmin = _datetime_nanreduce(array, min).astype("datetime64[Y]").astype(int)
dmax = _datetime_nanreduce(array, max).astype("datetime64[Y]").astype(int)
offset = (
np.array((dmin + dmax) // 2).astype("datetime64[Y]").astype(array.dtype)
)
# From version 2025.01.2 xarray uses np.datetime64[unit], where unit
# is one of "s", "ms", "us", "ns".
# To not have to worry about the resolution, we just convert the output
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,19 @@ def test_cftime_datetime_mean(dask):
assert_equal(result, expected)


@pytest.mark.parametrize("dask", [False, True])
def test_mean_over_long_spanning_datetime64(dask) -> None:
if dask and not has_dask:
pytest.skip("requires dask")
array = np.array(["1678-01-01", "NaT", "2260-01-01"], dtype="datetime64[ns]")
da = DataArray(array, dims=["time"])
if dask:
da = da.chunk({"time": 2})
expected = DataArray(np.array("1969-01-01", dtype="datetime64[ns]"))
result = da.mean()
assert_equal(result, expected)


@requires_cftime
@requires_dask
def test_mean_over_non_time_dim_of_dataset_with_dask_backed_cftime_data():
Expand Down

0 comments on commit df2ecf4

Please sign in to comment.