Illviljan · pull · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -116,7 +116,7 @@ jobs:
           python xarray/util/print_versions.py
       - name: Install mypy
         run: |
-          python -m pip install "mypy==1.13" --force-reinstall
+          python -m pip install "mypy==1.15" --force-reinstall
 
       - name: Run mypy
         run: |
@@ -167,7 +167,7 @@ jobs:
           python xarray/util/print_versions.py
       - name: Install mypy
         run: |
-          python -m pip install "mypy==1.13" --force-reinstall
+          python -m pip install "mypy==1.15" --force-reinstall
 
       - name: Run mypy
         run: |

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,7 +21,8 @@ v2025.02.0 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- Allow kwargs in :py:meth:`DataTree.map_over_datasets` and :py:func:`map_over_datasets` (:issue:`10009`, :pull:`10012`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -893,7 +893,7 @@ def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]:
         return dict(zip(self.dims, key, strict=True))
 
     def _getitem_coord(self, key: Any) -> Self:
-        from xarray.core.dataset import _get_virtual_variable
+        from xarray.core.dataset_utils import _get_virtual_variable
 
         try:
             var = self._coords[key]

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -24,11 +24,14 @@
 from operator import methodcaller
 from os import PathLike
 from types import EllipsisType
-from typing import IO, TYPE_CHECKING, Any, Generic, Literal, cast, overload
+from typing import IO, TYPE_CHECKING, Any, Literal, cast, overload
 
 import numpy as np
 from pandas.api.types import is_extension_array_dtype
 
+from xarray.core.dataset_utils import _get_virtual_variable, _LocIndexer
+from xarray.core.dataset_variables import DataVariables
+
 # remove once numpy 2.0 is the oldest supported version
 try:
     from numpy.exceptions import RankWarning
@@ -98,7 +101,6 @@
     T_ChunksFreq,
     T_DataArray,
     T_DataArrayOrSet,
-    T_Dataset,
     ZarrWriteModes,
 )
 from xarray.core.utils import (
@@ -196,43 +198,6 @@
 ]
 
 
-def _get_virtual_variable(
-    variables, key: Hashable, dim_sizes: Mapping | None = None
-) -> tuple[Hashable, Hashable, Variable]:
-    """Get a virtual variable (e.g., 'time.year') from a dict of xarray.Variable
-    objects (if possible)
-
-    """
-    from xarray.core.dataarray import DataArray
-
-    if dim_sizes is None:
-        dim_sizes = {}
-
-    if key in dim_sizes:
-        data = pd.Index(range(dim_sizes[key]), name=key)
-        variable = IndexVariable((key,), data)
-        return key, key, variable
-
-    if not isinstance(key, str):
-        raise KeyError(key)
-
-    split_key = key.split(".", 1)
-    if len(split_key) != 2:
-        raise KeyError(key)
-
-    ref_name, var_name = split_key
-    ref_var = variables[ref_name]
-
-    if _contains_datetime_like_objects(ref_var):
-        ref_var = DataArray(ref_var)
-        data = getattr(ref_var.dt, var_name).data
-    else:
-        data = getattr(ref_var, var_name).data
-    virtual_var = Variable(ref_var.dims, data)
-
-    return ref_name, var_name, virtual_var
-
-
 def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
     """
     Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
@@ -367,19 +332,6 @@ def _maybe_chunk(
         return var
 
 
-def as_dataset(obj: Any) -> Dataset:
-    """Cast the given object to a Dataset.
-
-    Handles Datasets, DataArrays and dictionaries of variables. A new Dataset
-    object is only created if the provided object is not already one.
-    """
-    if hasattr(obj, "to_dataset"):
-        obj = obj.to_dataset()
-    if not isinstance(obj, Dataset):
-        obj = Dataset(obj)
-    return obj
-
-
 def _get_func_args(func, param_names):
     """Use `inspect.signature` to try accessing `func` args. Otherwise, ensure
     they are provided by user.
@@ -468,84 +420,6 @@ def merge_data_and_coords(data_vars: DataVars, coords) -> _MergeResult:
     )
 
 
-class DataVariables(Mapping[Any, "DataArray"]):
-    __slots__ = ("_dataset",)
-
-    def __init__(self, dataset: Dataset):
-        self._dataset = dataset
-
-    def __iter__(self) -> Iterator[Hashable]:
-        return (
-            key
-            for key in self._dataset._variables
-            if key not in self._dataset._coord_names
-        )
-
-    def __len__(self) -> int:
-        length = len(self._dataset._variables) - len(self._dataset._coord_names)
-        assert length >= 0, "something is wrong with Dataset._coord_names"
-        return length
-
-    def __contains__(self, key: Hashable) -> bool:
-        return key in self._dataset._variables and key not in self._dataset._coord_names
-
-    def __getitem__(self, key: Hashable) -> DataArray:
-        if key not in self._dataset._coord_names:
-            return self._dataset[key]
-        raise KeyError(key)
-
-    def __repr__(self) -> str:
-        return formatting.data_vars_repr(self)
-
-    @property
-    def variables(self) -> Mapping[Hashable, Variable]:
-        all_variables = self._dataset.variables
-        return Frozen({k: all_variables[k] for k in self})
-
-    @property
-    def dtypes(self) -> Frozen[Hashable, np.dtype]:
-        """Mapping from data variable names to dtypes.
-
-        Cannot be modified directly, but is updated when adding new variables.
-
-        See Also
-        --------
-        Dataset.dtype
-        """
-        return self._dataset.dtypes
-
-    def _ipython_key_completions_(self):
-        """Provide method for the key-autocompletions in IPython."""
-        return [
-            key
-            for key in self._dataset._ipython_key_completions_()
-            if key not in self._dataset._coord_names
-        ]
-
-
-class _LocIndexer(Generic[T_Dataset]):
-    __slots__ = ("dataset",)
-
-    def __init__(self, dataset: T_Dataset):
-        self.dataset = dataset
-
-    def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset:
-        if not utils.is_dict_like(key):
-            raise TypeError("can only lookup dictionaries from Dataset.loc")
-        return self.dataset.sel(key)
-
-    def __setitem__(self, key, value) -> None:
-        if not utils.is_dict_like(key):
-            raise TypeError(
-                "can only set locations defined by dictionaries from Dataset.loc."
-                f" Got: {key}"
-            )
-
-        # set new values
-        dim_indexers = map_index_queries(self.dataset, key).dim_indexers
-        self.dataset[dim_indexers] = value
-
-
 class Dataset(
     DataWithCoords,
     DatasetAggregations,

diff --git a/xarray/core/dataset_utils.py b/xarray/core/dataset_utils.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+import typing
+from collections.abc import Hashable, Mapping
+from typing import Any, Generic
+
+import pandas as pd
+
+from xarray.core import utils
+from xarray.core.common import _contains_datetime_like_objects
+from xarray.core.indexing import map_index_queries
+from xarray.core.types import T_Dataset
+from xarray.core.variable import IndexVariable, Variable
+
+if typing.TYPE_CHECKING:
+    from xarray.core.dataset import Dataset
+
+
+class _LocIndexer(Generic[T_Dataset]):
+    __slots__ = ("dataset",)
+
+    def __init__(self, dataset: T_Dataset):
+        self.dataset = dataset
+
+    def __getitem__(self, key: Mapping[Any, Any]) -> T_Dataset:
+        if not utils.is_dict_like(key):
+            raise TypeError("can only lookup dictionaries from Dataset.loc")
+        return self.dataset.sel(key)
+
+    def __setitem__(self, key, value) -> None:
+        if not utils.is_dict_like(key):
+            raise TypeError(
+                "can only set locations defined by dictionaries from Dataset.loc."
+                f" Got: {key}"
+            )
+
+        # set new values
+        dim_indexers = map_index_queries(self.dataset, key).dim_indexers
+        self.dataset[dim_indexers] = value
+
+
+def as_dataset(obj: Any) -> Dataset:
+    """Cast the given object to a Dataset.
+
+    Handles Datasets, DataArrays and dictionaries of variables. A new Dataset
+    object is only created if the provided object is not already one.
+    """
+    from xarray.core.dataset import Dataset
+
+    if hasattr(obj, "to_dataset"):
+        obj = obj.to_dataset()
+    if not isinstance(obj, Dataset):
+        obj = Dataset(obj)
+    return obj
+
+
+def _get_virtual_variable(
+    variables, key: Hashable, dim_sizes: Mapping | None = None
+) -> tuple[Hashable, Hashable, Variable]:
+    """Get a virtual variable (e.g., 'time.year') from a dict of xarray.Variable
+    objects (if possible)
+
+    """
+    from xarray.core.dataarray import DataArray
+
+    if dim_sizes is None:
+        dim_sizes = {}
+
+    if key in dim_sizes:
+        data = pd.Index(range(dim_sizes[key]), name=key)
+        variable = IndexVariable((key,), data)
+        return key, key, variable
+
+    if not isinstance(key, str):
+        raise KeyError(key)
+
+    split_key = key.split(".", 1)
+    if len(split_key) != 2:
+        raise KeyError(key)
+
+    ref_name, var_name = split_key
+    ref_var = variables[ref_name]
+
+    if _contains_datetime_like_objects(ref_var):
+        ref_var = DataArray(ref_var)
+        data = getattr(ref_var.dt, var_name).data
+    else:
+        data = getattr(ref_var, var_name).data
+    virtual_var = Variable(ref_var.dims, data)
+
+    return ref_name, var_name, virtual_var
diff --git a/xarray/core/dataset_variables.py b/xarray/core/dataset_variables.py
@@ -0,0 +1,68 @@
+import typing
+from collections.abc import Hashable, Iterator, Mapping
+from typing import Any
+
+import numpy as np
+
+from xarray.core import formatting
+from xarray.core.utils import Frozen
+from xarray.core.variable import Variable
+
+if typing.TYPE_CHECKING:
+    from xarray.core.dataarray import DataArray
+    from xarray.core.dataset import Dataset
+
+
+class DataVariables(Mapping[Any, "DataArray"]):
+    __slots__ = ("_dataset",)
+
+    def __init__(self, dataset: "Dataset"):
+        self._dataset = dataset
+
+    def __iter__(self) -> Iterator[Hashable]:
+        return (
+            key
+            for key in self._dataset._variables
+            if key not in self._dataset._coord_names
+        )
+
+    def __len__(self) -> int:
+        length = len(self._dataset._variables) - len(self._dataset._coord_names)
+        assert length >= 0, "something is wrong with Dataset._coord_names"
+        return length
+
+    def __contains__(self, key: Hashable) -> bool:
+        return key in self._dataset._variables and key not in self._dataset._coord_names
+
+    def __getitem__(self, key: Hashable) -> "DataArray":
+        if key not in self._dataset._coord_names:
+            return self._dataset[key]
+        raise KeyError(key)
+
+    def __repr__(self) -> str:
+        return formatting.data_vars_repr(self)
+
+    @property
+    def variables(self) -> Mapping[Hashable, Variable]:
+        all_variables = self._dataset.variables
+        return Frozen({k: all_variables[k] for k in self})
+
+    @property
+    def dtypes(self) -> Frozen[Hashable, np.dtype]:
+        """Mapping from data variable names to dtypes.
+
+        Cannot be modified directly, but is updated when adding new variables.
+
+        See Also
+        --------
+        Dataset.dtype
+        """
+        return self._dataset.dtypes
+
+    def _ipython_key_completions_(self):
+        """Provide method for the key-autocompletions in IPython."""
+        return [
+            key
+            for key in self._dataset._ipython_key_completions_()
+            if key not in self._dataset._coord_names
+        ]