Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New defaults for concat, merge, combine_* #10062

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)
from xarray.backends.locks import _get_scheduler
from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
from xarray.core import indexing
from xarray.core import dtypes, indexing
from xarray.core.combine import (
_infer_concat_order_from_positions,
_nested_combine,
Expand All @@ -49,6 +49,13 @@
from xarray.core.utils import is_remote_uri
from xarray.namedarray.daskmanager import DaskManager
from xarray.namedarray.parallelcompat import guess_chunkmanager
from xarray.util.deprecation_helpers import (
_COMPAT_DEFAULT,
_COORDS_DEFAULT,
_DATA_VARS_DEFAULT,
_JOIN_DEFAULT,
CombineKwargDefault,
)

if TYPE_CHECKING:
try:
Expand Down Expand Up @@ -1402,14 +1409,16 @@ def open_mfdataset(
| Sequence[Index]
| None
) = None,
compat: CompatOptions = "no_conflicts",
compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT,
preprocess: Callable[[Dataset], Dataset] | None = None,
engine: T_Engine | None = None,
data_vars: Literal["all", "minimal", "different"] | list[str] = "all",
coords="different",
data_vars: Literal["all", "minimal", "different"]
| list[str]
| CombineKwargDefault = _DATA_VARS_DEFAULT,
coords=_COORDS_DEFAULT,
combine: Literal["by_coords", "nested"] = "by_coords",
parallel: bool = False,
join: JoinOptions = "outer",
join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
attrs_file: str | os.PathLike | None = None,
combine_attrs: CombineAttrsOptions = "override",
**kwargs,
Expand Down Expand Up @@ -1596,9 +1605,6 @@ def open_mfdataset(

paths1d: list[str | ReadBuffer]
if combine == "nested":
if isinstance(concat_dim, str | DataArray) or concat_dim is None:
concat_dim = [concat_dim] # type: ignore[assignment]

# This creates a flat list which is easier to iterate over, whilst
# encoding the originally-supplied structure as "ids".
# The "ids" are not used at all if combine='by_coords`.
Expand Down Expand Up @@ -1647,13 +1653,14 @@ def open_mfdataset(
# along each dimension, using structure given by "ids"
combined = _nested_combine(
datasets,
concat_dims=concat_dim,
concat_dim=concat_dim,
compat=compat,
data_vars=data_vars,
coords=coords,
ids=ids,
join=join,
combine_attrs=combine_attrs,
fill_value=dtypes.NA,
)
elif combine == "by_coords":
# Redo ordering from coordinates, ignoring how they were ordered
Expand Down
27 changes: 26 additions & 1 deletion xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import functools
import operator
import warnings
from collections import defaultdict
from collections.abc import Callable, Hashable, Iterable, Mapping
from contextlib import suppress
Expand All @@ -22,6 +23,7 @@
from xarray.core.types import T_Alignable
from xarray.core.utils import is_dict_like, is_full_slice
from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions
from xarray.util.deprecation_helpers import CombineKwargDefault

if TYPE_CHECKING:
from xarray.core.dataarray import DataArray
Expand Down Expand Up @@ -418,12 +420,35 @@ def align_indexes(self) -> None:
else:
need_reindex = False
if need_reindex:
if (
isinstance(self.join, CombineKwargDefault)
and self.join != "exact"
):
warnings.warn(
self.join.warning_message(
"This change will result in the following ValueError:"
"cannot be aligned with join='exact' because "
"index/labels/sizes are not equal along "
"these coordinates (dimensions): "
+ ", ".join(
f"{name!r} {dims!r}" for name, dims in key[0]
),
recommend_set_options=False,
),
category=FutureWarning,
stacklevel=2,
)
if self.join == "exact":
raise ValueError(
"cannot align objects with join='exact' where "
"index/labels/sizes are not equal along "
"these coordinates (dimensions): "
+ ", ".join(f"{name!r} {dims!r}" for name, dims in key[0])
+ (
self.join.error_message()
if isinstance(self.join, CombineKwargDefault)
else ""
)
)
joiner = self._get_index_joiner(index_cls)
joined_index = joiner(matching_indexes)
Expand Down Expand Up @@ -886,7 +911,7 @@ def align(

def deep_align(
objects: Iterable[Any],
join: JoinOptions = "inner",
join: JoinOptions | CombineKwargDefault = "inner",
copy: bool = True,
indexes=None,
exclude: str | Iterable[Hashable] = frozenset(),
Expand Down
94 changes: 55 additions & 39 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
from xarray.core.dataset import Dataset
from xarray.core.merge import merge
from xarray.core.utils import iterate_nested
from xarray.util.deprecation_helpers import (
_COMPAT_DEFAULT,
_COORDS_DEFAULT,
_DATA_VARS_DEFAULT,
_JOIN_DEFAULT,
CombineKwargDefault,
)

if TYPE_CHECKING:
from xarray.core.types import (
Expand Down Expand Up @@ -200,12 +207,12 @@ def _check_shape_tile_ids(combined_tile_ids):
def _combine_nd(
combined_ids,
concat_dims,
data_vars="all",
coords="different",
compat: CompatOptions = "no_conflicts",
fill_value=dtypes.NA,
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
data_vars,
coords,
compat: CompatOptions | CombineKwargDefault,
fill_value,
join: JoinOptions | CombineKwargDefault,
combine_attrs: CombineAttrsOptions,
):
"""
Combines an N-dimensional structure of datasets into one by applying a
Expand Down Expand Up @@ -263,9 +270,9 @@ def _combine_all_along_first_dim(
data_vars,
coords,
compat: CompatOptions,
fill_value=dtypes.NA,
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
fill_value,
join: JoinOptions | CombineKwargDefault,
combine_attrs: CombineAttrsOptions,
):
# Group into lines of datasets which must be combined along dim
grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id)
Expand All @@ -276,20 +283,27 @@ def _combine_all_along_first_dim(
combined_ids = dict(sorted(group))
datasets = combined_ids.values()
new_combined_ids[new_id] = _combine_1d(
datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs
datasets,
concat_dim=dim,
compat=compat,
data_vars=data_vars,
coords=coords,
fill_value=fill_value,
join=join,
combine_attrs=combine_attrs,
)
return new_combined_ids


def _combine_1d(
datasets,
concat_dim,
compat: CompatOptions = "no_conflicts",
data_vars="all",
coords="different",
fill_value=dtypes.NA,
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
compat: CompatOptions,
data_vars,
coords,
fill_value,
join: JoinOptions | CombineKwargDefault,
combine_attrs: CombineAttrsOptions,
):
"""
Applies either concat or merge to 1D list of datasets depending on value
Expand Down Expand Up @@ -338,18 +352,21 @@ def _new_tile_id(single_id_ds_pair):

def _nested_combine(
datasets,
concat_dims,
concat_dim,
compat,
data_vars,
coords,
ids,
fill_value=dtypes.NA,
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
fill_value,
join: JoinOptions | CombineKwargDefault,
combine_attrs: CombineAttrsOptions,
):
if len(datasets) == 0:
return Dataset()

if isinstance(concat_dim, str | DataArray) or concat_dim is None:
concat_dim = [concat_dim] # type: ignore[assignment]

# Arrange datasets for concatenation
# Use information from the shape of the user input
if not ids:
Expand All @@ -366,7 +383,7 @@ def _nested_combine(
# Apply series of concatenate or merge operations along each dimension
combined = _combine_nd(
combined_ids,
concat_dims,
concat_dims=concat_dim,
compat=compat,
data_vars=data_vars,
coords=coords,
Expand All @@ -384,11 +401,11 @@ def _nested_combine(
def combine_nested(
datasets: DATASET_HYPERCUBE,
concat_dim: str | DataArray | None | Sequence[str | DataArray | pd.Index | None],
compat: str = "no_conflicts",
data_vars: str = "all",
coords: str = "different",
compat: str | CombineKwargDefault = _COMPAT_DEFAULT,
data_vars: str | CombineKwargDefault = _DATA_VARS_DEFAULT,
coords: str | CombineKwargDefault = _COORDS_DEFAULT,
fill_value: object = dtypes.NA,
join: JoinOptions = "outer",
join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
combine_attrs: CombineAttrsOptions = "drop",
) -> Dataset:
"""
Expand Down Expand Up @@ -581,13 +598,10 @@ def combine_nested(
if mixed_datasets_and_arrays:
raise ValueError("Can't combine datasets with unnamed arrays.")

if isinstance(concat_dim, str | DataArray) or concat_dim is None:
concat_dim = [concat_dim]

# The IDs argument tells _nested_combine that datasets aren't yet sorted
return _nested_combine(
datasets,
concat_dims=concat_dim,
concat_dim=concat_dim,
compat=compat,
data_vars=data_vars,
coords=coords,
Expand Down Expand Up @@ -619,12 +633,12 @@ def groupby_defaultdict(

def _combine_single_variable_hypercube(
datasets,
fill_value=dtypes.NA,
data_vars="all",
coords="different",
compat: CompatOptions = "no_conflicts",
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "no_conflicts",
fill_value,
data_vars,
coords,
compat: CompatOptions | CombineKwargDefault,
join: JoinOptions | CombineKwargDefault,
combine_attrs: CombineAttrsOptions,
):
"""
Attempt to combine a list of Datasets into a hypercube using their
Expand Down Expand Up @@ -678,11 +692,13 @@ def _combine_single_variable_hypercube(

def combine_by_coords(
data_objects: Iterable[Dataset | DataArray] = [],
compat: CompatOptions = "no_conflicts",
data_vars: Literal["all", "minimal", "different"] | list[str] = "all",
coords: str = "different",
compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT,
data_vars: Literal["all", "minimal", "different"]
| list[str]
| CombineKwargDefault = _DATA_VARS_DEFAULT,
coords: str | CombineKwargDefault = _COORDS_DEFAULT,
fill_value: object = dtypes.NA,
join: JoinOptions = "outer",
join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
combine_attrs: CombineAttrsOptions = "no_conflicts",
) -> Dataset | DataArray:
"""
Expand Down
Loading
Loading