From 5461a9ff27bc9ece7d13e3d45af66130473073af Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 24 Feb 2025 15:04:16 -0500 Subject: [PATCH] Use sentinel value to change default with warnings --- xarray/backends/api.py | 22 +++-- xarray/core/alignment.py | 30 ++++++- xarray/core/combine.py | 51 ++++++----- xarray/core/concat.py | 140 +++++++++++++++++++++++------ xarray/core/dataset.py | 22 ++++- xarray/core/groupby.py | 18 +++- xarray/core/merge.py | 67 ++++++++++---- xarray/core/options.py | 6 ++ xarray/plot/dataarray_plot.py | 9 +- xarray/util/deprecation_helpers.py | 61 ++++++++++++- 10 files changed, 344 insertions(+), 82 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index ba048750bce..1da89ff9a82 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -49,6 +49,13 @@ from xarray.core.utils import is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: try: @@ -1402,14 +1409,16 @@ def open_mfdataset( | Sequence[Index] | None ) = None, - compat: CompatOptions = "no_conflicts", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, preprocess: Callable[[Dataset], Dataset] | None = None, engine: T_Engine | None = None, - data_vars: Literal["all", "minimal", "different"] | list[str] = "all", - coords="different", + data_vars: Literal["all", "minimal", "different"] + | list[str] + | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords=_COORDS_DEFAULT, combine: Literal["by_coords", "nested"] = "by_coords", parallel: bool = False, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, attrs_file: str | os.PathLike | None = None, combine_attrs: CombineAttrsOptions = "override", **kwargs, @@ -1596,9 +1605,6 @@ def open_mfdataset( paths1d: list[str | ReadBuffer] if combine == "nested": - if isinstance(concat_dim, str | DataArray) or concat_dim is None: - concat_dim = [concat_dim] # type: ignore[assignment] - # This creates a flat list which is easier to iterate over, whilst # encoding the originally-supplied structure as "ids". # The "ids" are not used at all if combine='by_coords`. @@ -1647,7 +1653,7 @@ def open_mfdataset( # along each dimension, using structure given by "ids" combined = _nested_combine( datasets, - concat_dims=concat_dim, + concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords, diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d6cdd45bb49..b2114155cbd 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -2,6 +2,7 @@ import functools import operator +import warnings from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress @@ -22,6 +23,7 @@ from xarray.core.types import T_Alignable from xarray.core.utils import is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions +from xarray.util.deprecation_helpers import CombineKwargDefault if TYPE_CHECKING: from xarray.core.dataarray import DataArray @@ -418,12 +420,38 @@ def align_indexes(self) -> None: else: need_reindex = False if need_reindex: + if ( + isinstance(self.join, CombineKwargDefault) + and self.join != "exact" + ): + warnings.warn( + self.join.warning_message( + "This change will result in the following ValueError:" + "cannot be aligned with join='exact' because " + "index/labels/sizes are not equal along " + "these coordinates (dimensions): " + + ", ".join( + f"{name!r} {dims!r}" for name, dims in key[0] + ), + recommend_set_options=False, + ), + category=FutureWarning, + stacklevel=2, + ) if self.join == "exact": + new_default_warning = ( + " Failure might be related to new default (join='exact'). " + "Previously the default was join='outer'. " + "The recommendation is to set join explicitly for this case." + ) raise ValueError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0]) + + new_default_warning + if isinstance(self.join, CombineKwargDefault) + else "" ) joiner = self._get_index_joiner(index_cls) joined_index = joiner(matching_indexes) @@ -886,7 +914,7 @@ def align( def deep_align( objects: Iterable[Any], - join: JoinOptions = "inner", + join: JoinOptions | CombineKwargDefault = "inner", copy: bool = True, indexes=None, exclude: str | Iterable[Hashable] = frozenset(), diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 33f477a28ce..b9a0d9f614a 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -12,6 +12,13 @@ from xarray.core.dataset import Dataset from xarray.core.merge import merge from xarray.core.utils import iterate_nested +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.types import ( @@ -202,9 +209,9 @@ def _combine_nd( concat_dims, data_vars, coords, - compat: CompatOptions, + compat: CompatOptions | CombineKwargDefault, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -264,7 +271,7 @@ def _combine_all_along_first_dim( coords, compat: CompatOptions, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): # Group into lines of datasets which must be combined along dim @@ -295,7 +302,7 @@ def _combine_1d( data_vars, coords, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -345,18 +352,21 @@ def _new_tile_id(single_id_ds_pair): def _nested_combine( datasets, - concat_dims, + concat_dim, compat, data_vars, coords, ids, fill_value, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): if len(datasets) == 0: return Dataset() + if isinstance(concat_dim, str | DataArray) or concat_dim is None: + concat_dim = [concat_dim] # type: ignore[assignment] + # Arrange datasets for concatenation # Use information from the shape of the user input if not ids: @@ -373,7 +383,7 @@ def _nested_combine( # Apply series of concatenate or merge operations along each dimension combined = _combine_nd( combined_ids, - concat_dims, + concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, @@ -391,11 +401,11 @@ def _nested_combine( def combine_nested( datasets: DATASET_HYPERCUBE, concat_dim: str | DataArray | None | Sequence[str | DataArray | pd.Index | None], - compat: str = "no_conflicts", - data_vars: str = "all", - coords: str = "different", + compat: str | CombineKwargDefault = _COMPAT_DEFAULT, + data_vars: str | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "drop", ) -> Dataset: """ @@ -588,13 +598,10 @@ def combine_nested( if mixed_datasets_and_arrays: raise ValueError("Can't combine datasets with unnamed arrays.") - if isinstance(concat_dim, str | DataArray) or concat_dim is None: - concat_dim = [concat_dim] - # The IDs argument tells _nested_combine that datasets aren't yet sorted return _nested_combine( datasets, - concat_dims=concat_dim, + concat_dim=concat_dim, compat=compat, data_vars=data_vars, coords=coords, @@ -629,8 +636,8 @@ def _combine_single_variable_hypercube( fill_value, data_vars, coords, - compat: CompatOptions, - join: JoinOptions, + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, ): """ @@ -685,11 +692,13 @@ def _combine_single_variable_hypercube( def combine_by_coords( data_objects: Iterable[Dataset | DataArray] = [], - compat: CompatOptions = "no_conflicts", - data_vars: Literal["all", "minimal", "different"] | list[str] = "all", - coords: str = "different", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + data_vars: Literal["all", "minimal", "different"] + | list[str] + | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: str | CombineKwargDefault = _COORDS_DEFAULT, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "no_conflicts", ) -> Dataset | DataArray: """ diff --git a/xarray/core/concat.py b/xarray/core/concat.py index a0ea72a7142..c8776baa934 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from collections.abc import Hashable, Iterable from typing import TYPE_CHECKING, Any, Union, overload @@ -20,6 +21,13 @@ from xarray.core.types import T_DataArray, T_Dataset, T_Variable from xarray.core.variable import Variable from xarray.core.variable import concat as concat_vars +from xarray.util.deprecation_helpers import ( + _COMPAT_CONCAT_DEFAULT, + _COORDS_DEFAULT, + _DATA_VARS_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.types import ( @@ -37,12 +45,12 @@ def concat( objs: Iterable[T_Dataset], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, - data_vars: T_DataVars = "all", - coords: ConcatOptions | list[Hashable] = "different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_Dataset: ... @@ -52,12 +60,12 @@ def concat( def concat( objs: Iterable[T_DataArray], dim: Hashable | T_Variable | T_DataArray | pd.Index | Any, - data_vars: T_DataVars = "all", - coords: ConcatOptions | list[Hashable] = "different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions: Iterable[Iterable[int]] | None = None, fill_value: object = dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | None = None, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ) -> T_DataArray: ... @@ -66,12 +74,12 @@ def concat( def concat( objs, dim, - data_vars: T_DataVars = "all", - coords="different", - compat: CompatOptions = "equals", + data_vars: T_DataVars | CombineKwargDefault = _DATA_VARS_DEFAULT, + coords: ConcatOptions | list[Hashable] | CombineKwargDefault = _COORDS_DEFAULT, + compat: CompatOptions | CombineKwargDefault = _COMPAT_CONCAT_DEFAULT, positions=None, fill_value=dtypes.NA, - join: JoinOptions = "outer", + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, combine_attrs: CombineAttrsOptions = "override", create_index_for_new_dim: bool = True, ): @@ -255,7 +263,9 @@ def concat( except StopIteration as err: raise ValueError("must supply at least one object to concatenate") from err - if compat not in set(_VALID_COMPAT) - {"minimal"}: + if not isinstance(compat, CombineKwargDefault) and compat not in set( + _VALID_COMPAT + ) - {"minimal"}: raise ValueError( f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" ) @@ -320,7 +330,14 @@ def _calc_concat_dim_index( return dim, index -def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, compat): +def _calc_concat_over( + datasets, + dim, + dim_names, + data_vars: T_DataVars, + coords, + compat, +): """ Determine which dataset variables need to be concatenated in the result, """ @@ -344,11 +361,37 @@ def _calc_concat_over(datasets, dim, dim_names, data_vars: T_DataVars, coords, c concat_dim_lengths.append(ds.sizes.get(dim, 1)) def process_subset_opt(opt, subset): - if isinstance(opt, str): + original = set(concat_over) + compat_str = ( + compat._value if isinstance(compat, CombineKwargDefault) else compat + ) + if isinstance(opt, str | CombineKwargDefault): if opt == "different": + if isinstance(compat, CombineKwargDefault) and compat != "override": + if subset == "data_vars" or not isinstance( + opt, CombineKwargDefault + ): + warnings.warn( + compat.warning_message( + "This change will result in the following ValueError:" + f"Cannot specify both {subset}='different' and compat='override'.", + recommend_set_options=False, + ), + category=FutureWarning, + stacklevel=2, + ) + if compat == "override": + new_default_warning = ( + " Failure might be related to new default (compat='override'). " + "Previously the default was compat='equals' or compat='no_conflicts'. " + "The recommendation is to set compat explicitly for this case." + ) raise ValueError( f"Cannot specify both {subset}='different' and compat='override'." + + new_default_warning + if isinstance(compat, CombineKwargDefault) + else "" ) # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): @@ -372,7 +415,7 @@ def process_subset_opt(opt, subset): # first check without comparing values i.e. no computes for var in variables[1:]: - equals[k] = getattr(variables[0], compat)( + equals[k] = getattr(variables[0], compat_str)( var, equiv=lazy_array_equiv ) if equals[k] is not True: @@ -395,7 +438,7 @@ def process_subset_opt(opt, subset): for ds_rhs in datasets[1:]: v_rhs = ds_rhs.variables[k].compute() computed.append(v_rhs) - if not getattr(v_lhs, compat)(v_rhs): + if not getattr(v_lhs, compat_str)(v_rhs): concat_over.add(k) equals[k] = False # computed variables are not to be re-computed @@ -418,6 +461,20 @@ def process_subset_opt(opt, subset): pass else: raise ValueError(f"unexpected value for {subset}: {opt}") + + if ( + isinstance(opt, CombineKwargDefault) + and opt != "minimal" + and original != concat_over + ): + warnings.warn( + opt.warning_message( + "This is likely to lead to different results when multiple datasets" + "have matching variables with overlapping values.", + ), + category=FutureWarning, + stacklevel=2, + ) else: valid_vars = tuple(getattr(datasets[0], subset)) invalid_vars = [k for k in opt if k not in valid_vars] @@ -479,14 +536,15 @@ def _parse_datasets( def _dataset_concat( datasets: Iterable[T_Dataset], dim: str | T_Variable | T_DataArray | pd.Index, - data_vars: T_DataVars, - coords: str | list[str], - compat: CompatOptions, + data_vars: T_DataVars | CombineKwargDefault, + coords: str | list[str] | CombineKwargDefault, + compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: Any, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, + warn_about_data_vars: bool = True, ) -> T_Dataset: """ Concatenate a sequence of datasets along a new or existing dimension @@ -501,6 +559,35 @@ def _dataset_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) + if not isinstance(compat, CombineKwargDefault) and compat not in set( + _VALID_COMPAT + ) - {"minimal"}: + raise ValueError( + f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'" + ) + + if ( + warn_about_data_vars + and isinstance(data_vars, CombineKwargDefault) + and data_vars == "all" + ): + if not isinstance(dim, str): + warnings.warn( + data_vars.warning_message( + "This is likely to lead to different results when using an object as the concat_dim.", + ), + category=FutureWarning, + stacklevel=2, + ) + elif dim is not None and all(dim not in ds for ds in datasets): + warnings.warn( + data_vars.warning_message( + "This is likely to lead to different results when constructing a new dimension.", + ), + category=FutureWarning, + stacklevel=2, + ) + if isinstance(dim, DataArray): dim_var = dim.variable elif isinstance(dim, Variable): @@ -718,12 +805,12 @@ def get_indexes(name): def _dataarray_concat( arrays: Iterable[T_DataArray], dim: str | T_Variable | T_DataArray | pd.Index, - data_vars: T_DataVars, - coords: str | list[str], - compat: CompatOptions, + data_vars: T_DataVars | CombineKwargDefault, + coords: str | list[str] | CombineKwargDefault, + compat: CompatOptions | CombineKwargDefault, positions: Iterable[Iterable[int]] | None, fill_value: object, - join: JoinOptions, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions, create_index_for_new_dim: bool, ) -> T_DataArray: @@ -736,7 +823,7 @@ def _dataarray_concat( "The elements in the input list need to be either all 'Dataset's or all 'DataArray's" ) - if data_vars != "all": + if not isinstance(data_vars, CombineKwargDefault) and data_vars != "all": raise ValueError( "data_vars is not a valid argument when concatenating DataArray objects" ) @@ -763,6 +850,7 @@ def _dataarray_concat( join=join, combine_attrs=combine_attrs, create_index_for_new_dim=create_index_for_new_dim, + warn_about_data_vars=False, ) merged_attrs = merge_attrs([da.attrs for da in arrays], combine_attrs) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 449f502c43a..af37b1bb3f2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -132,7 +132,13 @@ from xarray.namedarray.parallelcompat import get_chunked_array_type, guess_chunkmanager from xarray.namedarray.pycompat import array_type, is_chunked_array, to_numpy from xarray.plot.accessor import DatasetPlotAccessor -from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, + _deprecate_positional_args, + deprecate_dims, +) if TYPE_CHECKING: from dask.dataframe import DataFrame as DaskDataFrame @@ -413,6 +419,7 @@ def merge_data_and_coords(data_vars: DataVars, coords) -> _MergeResult: [data_vars, coords], compat="broadcast_equals", join="outer", + combine_attrs="override", explicit_coords=tuple(coords), indexes=coords.xindexes, priority_arg=1, @@ -5506,7 +5513,14 @@ def stack_dataarray(da): # concatenate the arrays stackable_vars = [stack_dataarray(da) for da in self.data_vars.values()] - data_array = concat(stackable_vars, dim=new_dim) + data_array = concat( + stackable_vars, + dim=new_dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) if name is not None: data_array.name = name @@ -5750,8 +5764,8 @@ def merge( self, other: CoercibleMapping | DataArray, overwrite_vars: Hashable | Iterable[Hashable] = frozenset(), - compat: CompatOptions = "no_conflicts", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: Any = xrdtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> Self: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b28ba390a9f..9a1827c4eb8 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1526,7 +1526,14 @@ def _combine(self, applied, shortcut=False): if shortcut: combined = self._concat_shortcut(applied, dim, positions) else: - combined = concat(applied, dim) + combined = concat( + applied, + dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size) if isinstance(combined, type(self._obj)): @@ -1686,7 +1693,14 @@ def _combine(self, applied): """Recombine the applied objects like the original.""" applied_example, applied = peek_at(applied) dim, positions = self._infer_concat_args(applied_example) - combined = concat(applied, dim) + combined = concat( + applied, + dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size) # assign coord when the applied function does not return that coord if dim not in applied_example.dims: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6426f741750..4a4100cde13 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping, Sequence, Set from typing import TYPE_CHECKING, Any, NamedTuple, Union @@ -17,6 +18,11 @@ ) from xarray.core.utils import Frozen, compat_dict_union, dict_equiv, equivalent from xarray.core.variable import Variable, as_variable, calculate_dimensions +from xarray.util.deprecation_helpers import ( + _COMPAT_DEFAULT, + _JOIN_DEFAULT, + CombineKwargDefault, +) if TYPE_CHECKING: from xarray.core.coordinates import Coordinates @@ -83,7 +89,7 @@ class MergeError(ValueError): def unique_variable( name: Hashable, variables: list[Variable], - compat: CompatOptions = "broadcast_equals", + compat: CompatOptions | CombineKwargDefault = "broadcast_equals", equals: bool | None = None, ) -> Variable: """Return the unique variable from a list of variables or raise MergeError. @@ -126,9 +132,12 @@ def unique_variable( combine_method = "fillna" if equals is None: + compat_str = ( + compat._value if isinstance(compat, CombineKwargDefault) else compat + ) # first check without comparing values i.e. no computes for var in variables[1:]: - equals = getattr(out, compat)(var, equiv=lazy_array_equiv) + equals = getattr(out, compat_str)(var, equiv=lazy_array_equiv) if equals is not True: break @@ -136,7 +145,7 @@ def unique_variable( # now compare values with minimum number of computes out = out.compute() for var in variables[1:]: - equals = getattr(out, compat)(var) + equals = getattr(out, compat_str)(var) if not equals: break @@ -154,7 +163,7 @@ def unique_variable( def _assert_compat_valid(compat): - if compat not in _VALID_COMPAT: + if not isinstance(compat, CombineKwargDefault) and compat not in _VALID_COMPAT: raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}") @@ -196,7 +205,7 @@ def _assert_prioritized_valid( def merge_collected( grouped: dict[Any, list[MergeElement]], prioritized: Mapping[Any, MergeElement] | None = None, - compat: CompatOptions = "minimal", + compat: CompatOptions | CombineKwargDefault = "minimal", combine_attrs: CombineAttrsOptions = "override", equals: dict[Any, bool] | None = None, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: @@ -290,6 +299,21 @@ def merge_collected( merged_vars[name] = unique_variable( name, variables, compat, equals.get(name, None) ) + # This is very likely to result in false positives, but there is no way + # to tell if the output will change without computing. + if ( + isinstance(compat, CombineKwargDefault) + and compat == "no_conflicts" + and len(variables) > 1 + ): + warnings.warn( + compat.warning_message( + "This is likely to lead to different results when" + "combining overlapping variables with the same name.", + ), + category=FutureWarning, + stacklevel=2, + ) except MergeError: if compat != "minimal": # we need more than "minimal" compatibility (for which @@ -626,8 +650,8 @@ class _MergeResult(NamedTuple): def merge_core( objects: Iterable[CoercibleMapping], - compat: CompatOptions = "broadcast_equals", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, combine_attrs: CombineAttrsOptions = "override", priority_arg: int | None = None, explicit_coords: Iterable[Hashable] | None = None, @@ -690,7 +714,11 @@ def merge_core( coerced = coerce_pandas_values(objects) aligned = deep_align( - coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value + coerced, + join=join, + copy=False, + indexes=indexes, + fill_value=fill_value, ) for pos, obj in skip_align_objs: @@ -699,7 +727,10 @@ def merge_core( collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( - collected, prioritized, compat=compat, combine_attrs=combine_attrs + collected, + prioritized, + compat=compat, + combine_attrs=combine_attrs, ) dims = calculate_dimensions(variables) @@ -730,8 +761,8 @@ def merge_core( def merge( objects: Iterable[DataArray | CoercibleMapping], - compat: CompatOptions = "no_conflicts", - join: JoinOptions = "outer", + compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT, + join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT, fill_value: object = dtypes.NA, combine_attrs: CombineAttrsOptions = "override", ) -> Dataset: @@ -975,8 +1006,8 @@ def merge( merge_result = merge_core( dict_like_objects, - compat, - join, + compat=compat, + join=join, combine_attrs=combine_attrs, fill_value=fill_value, ) @@ -987,8 +1018,8 @@ def dataset_merge_method( dataset: Dataset, other: CoercibleMapping, overwrite_vars: Hashable | Iterable[Hashable], - compat: CompatOptions, - join: JoinOptions, + compat: CompatOptions | CombineKwargDefault, + join: JoinOptions | CombineKwargDefault, fill_value: Any, combine_attrs: CombineAttrsOptions, ) -> _MergeResult: @@ -1021,8 +1052,8 @@ def dataset_merge_method( return merge_core( objs, - compat, - join, + compat=compat, + join=join, priority_arg=priority_arg, fill_value=fill_value, combine_attrs=combine_attrs, @@ -1054,6 +1085,8 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe return merge_core( [dataset, other], + compat="broadcast_equals", + join="outer", priority_arg=1, indexes=dataset.xindexes, combine_attrs="override", diff --git a/xarray/core/options.py b/xarray/core/options.py index 2d69e4b6584..f17cd8ab9d0 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -29,6 +29,7 @@ "keep_attrs", "warn_for_unclosed_files", "use_bottleneck", + "use_new_combine_kwarg_defaults", "use_numbagg", "use_opt_einsum", "use_flox", @@ -57,6 +58,7 @@ class T_Options(TypedDict): warn_for_unclosed_files: bool use_bottleneck: bool use_flox: bool + use_new_combine_kwarg_defaults: bool use_numbagg: bool use_opt_einsum: bool @@ -84,6 +86,7 @@ class T_Options(TypedDict): "warn_for_unclosed_files": False, "use_bottleneck": True, "use_flox": True, + "use_new_combine_kwarg_defaults": False, "use_numbagg": True, "use_opt_einsum": True, } @@ -113,6 +116,7 @@ def _positive_integer(value: Any) -> bool: "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], "use_bottleneck": lambda value: isinstance(value, bool), + "use_new_combine_kwarg_defaults": lambda value: isinstance(value, bool), "use_numbagg": lambda value: isinstance(value, bool), "use_opt_einsum": lambda value: isinstance(value, bool), "use_flox": lambda value: isinstance(value, bool), @@ -250,6 +254,8 @@ class set_options: use_flox : bool, default: True Whether to use ``numpy_groupies`` and `flox`` to accelerate groupby and resampling reductions. + use_new_combine_kwarg_defaults : bool, default False + Whether to use new default kwarg values for open_mfdataset. use_numbagg : bool, default: True Whether to use ``numbagg`` to accelerate reductions. Takes precedence over ``use_bottleneck`` when both are True. diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index cca9fe4f561..9663303276e 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -196,7 +196,14 @@ def _prepare_plot1d_data( dim = coords_to_plot.get(v, None) if (dim is not None) and (dim in darray.dims): darray_nan = np.nan * darray.isel({dim: -1}) - darray = concat([darray, darray_nan], dim=dim) + darray = concat( + [darray, darray_nan], + dim=dim, + data_vars="all", + coords="different", + compat="equals", + join="outer", + ) dims_T.append(coords_to_plot[v]) # Lines should never connect to the same coordinate when stacked, diff --git a/xarray/util/deprecation_helpers.py b/xarray/util/deprecation_helpers.py index 1064082872d..44c13560736 100644 --- a/xarray/util/deprecation_helpers.py +++ b/xarray/util/deprecation_helpers.py @@ -35,9 +35,10 @@ import warnings from collections.abc import Callable from functools import wraps -from typing import TypeVar +from typing import Any, TypeVar -from xarray.core.utils import emit_user_level_warning +from xarray.core.options import OPTIONS +from xarray.core.utils import ReprObject, emit_user_level_warning T = TypeVar("T", bound=Callable) @@ -145,3 +146,59 @@ def wrapper(*args, **kwargs): # We're quite confident we're just returning `T` from this function, so it's fine to ignore typing # within the function. return wrapper # type: ignore[return-value] + + +class CombineKwargDefault(ReprObject): + """Object that handles deprecation cycle for kwarg default values.""" + + _old: str + _new: str + _name: str + + def __init__(self, *, name: str, old: str, new: str): + self._name = name + self._old = old + self._new = new + + def __eq__(self, other: ReprObject | Any) -> bool: + # TODO: What type can other be? ArrayLike? + return ( + self._value == other._value + if isinstance(other, ReprObject) + else self._value == other + ) + + @property + def _value(self): + return self._new if OPTIONS["use_new_combine_kwarg_defaults"] else self._old + + def __hash__(self) -> int: + return hash(self._value) + + def warning_message(self, message: str, recommend_set_options: bool = True): + if recommend_set_options: + recommendation = ( + " To opt in to new defaults and get rid of these warnings now " + "use `set_options(use_new_combine_kwarg_defaults=True) or " + f"set {self._name} explicitly." + ) + else: + recommendation = ( + f" The recommendation is to set {self._name} explicitly for this case." + ) + + return ( + f"In a future version of xarray the default value for {self._name} will " + + f"change from {self._name}={self._old!r} to {self._name}={self._new!r}. " + + message + + recommendation + ) + + +_DATA_VARS_DEFAULT = CombineKwargDefault(name="data_vars", old="all", new="minimal") +_COORDS_DEFAULT = CombineKwargDefault(name="coords", old="different", new="minimal") +_COMPAT_CONCAT_DEFAULT = CombineKwargDefault( + name="compat", old="equals", new="override" +) +_COMPAT_DEFAULT = CombineKwargDefault(name="compat", old="no_conflicts", new="override") +_JOIN_DEFAULT = CombineKwargDefault(name="join", old="outer", new="exact")