diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 83d5afa6a09..e95f710c43c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -14,7 +14,7 @@ import uuid import warnings from collections.abc import Generator, Iterator, Mapping -from contextlib import ExitStack +from contextlib import ExitStack, nullcontext from io import BytesIO from os import listdir from pathlib import Path @@ -4511,13 +4511,14 @@ def setup_files_and_datasets(self, fuzz=0): # to test join='exact' ds1["x"] = ds1.x + fuzz - with create_tmp_file() as tmpfile1: - with create_tmp_file() as tmpfile2: - # save data to the temporary files - ds1.to_netcdf(tmpfile1) - ds2.to_netcdf(tmpfile2) + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_file() as tmpfile1: + with create_tmp_file() as tmpfile2: + # save data to the temporary files + ds1.to_netcdf(tmpfile1) + ds2.to_netcdf(tmpfile2) - yield [tmpfile1, tmpfile2], [ds1, ds2] + yield [tmpfile1, tmpfile2], [ds1, ds2] def gen_datasets_with_common_coord_and_time(self): # create coordinate data @@ -4554,11 +4555,19 @@ def test_open_mfdataset_does_same_as_concat( if combine == "by_coords": files.reverse() with open_mfdataset( - files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join + files, + data_vars=opt, + combine=combine, + concat_dim=concat_dim, + join=join, + compat="no_conflicts", ) as ds: - ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join) + ds_expect = xr.concat( + [ds1, ds2], data_vars=opt, dim="t", join=join, compat="equals" + ) assert_identical(ds, ds_expect) + @pytest.mark.parametrize("use_new_combine_kwarg_defaults", [True, False]) @pytest.mark.parametrize( ["combine_attrs", "attrs", "expected", "expect_error"], ( @@ -4586,7 +4595,12 @@ def test_open_mfdataset_does_same_as_concat( ), ) def test_open_mfdataset_dataset_combine_attrs( - self, combine_attrs, attrs, expected, expect_error + self, + use_new_combine_kwarg_defaults, + combine_attrs, + attrs, + expected, + expect_error, ): with self.setup_files_and_datasets() as (files, [ds1, ds2]): # Give the files an inconsistent attribute @@ -4596,22 +4610,24 @@ def test_open_mfdataset_dataset_combine_attrs( ds.close() ds.to_netcdf(f) - if expect_error: - with pytest.raises(xr.MergeError): - xr.open_mfdataset( - files, - combine="nested", - concat_dim="t", - combine_attrs=combine_attrs, - ) - else: - with xr.open_mfdataset( - files, - combine="nested", - concat_dim="t", - combine_attrs=combine_attrs, - ) as ds: - assert ds.attrs == expected + with set_options( + use_new_combine_kwarg_defaults=use_new_combine_kwarg_defaults + ): + warning = ( + pytest.warns(FutureWarning) + if not use_new_combine_kwarg_defaults + else nullcontext() + ) + error = pytest.raises(xr.MergeError) if expect_error else nullcontext() + with warning: + with error: + with xr.open_mfdataset( + files, + combine="nested", + concat_dim="t", + combine_attrs=combine_attrs, + ) as ds: + assert ds.attrs == expected def test_open_mfdataset_dataset_attr_by_coords(self) -> None: """ @@ -4640,30 +4656,65 @@ def test_open_mfdataset_dataarray_attr_by_coords(self) -> None: ds.close() ds.to_netcdf(f) - with xr.open_mfdataset(files, combine="nested", concat_dim="t") as ds: + with xr.open_mfdataset( + files, data_vars="minimal", combine="nested", concat_dim="t" + ) as ds: assert ds["v1"].test_dataarray_attr == 0 @pytest.mark.parametrize( "combine, concat_dim", [("nested", "t"), ("by_coords", None)] ) - @pytest.mark.parametrize("opt", ["all", "minimal", "different"]) + @pytest.mark.parametrize( + "kwargs", + [ + {"data_vars": "all"}, + {"data_vars": "minimal"}, + { + "data_vars": "all", + "coords": "different", + "compat": "no_conflicts", + }, # old defaults + { + "data_vars": "minimal", + "coords": "minimal", + "compat": "override", + }, # new defaults + {"data_vars": "different", "compat": "no_conflicts"}, + {}, + ], + ) def test_open_mfdataset_exact_join_raises_error( - self, combine, concat_dim, opt + self, combine, concat_dim, kwargs ) -> None: - with self.setup_files_and_datasets(fuzz=0.1) as (files, [ds1, ds2]): + with self.setup_files_and_datasets(fuzz=0.1) as (files, _): if combine == "by_coords": files.reverse() with pytest.raises( - ValueError, match=r"cannot align objects.*join.*exact.*" + ValueError, match="cannot align objects with join='exact'" ): open_mfdataset( files, - data_vars=opt, + **kwargs, combine=combine, concat_dim=concat_dim, join="exact", ) + def test_open_mfdataset_defaults_with_exact_join_warns_as_well_as_raising( + self, + ) -> None: + with self.setup_files_and_datasets(fuzz=0.1) as (files, _): + with set_options(use_new_combine_kwarg_defaults=False): + files.reverse() + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + with pytest.raises( + ValueError, match="cannot align objects with join='exact'" + ): + open_mfdataset(files, combine="by_coords", join="exact") + def test_common_coord_when_datavars_all(self) -> None: opt: Final = "all" @@ -4711,6 +4762,50 @@ def test_invalid_data_vars_value_should_fail(self) -> None: with open_mfdataset(files, coords="minimum", combine="by_coords"): pass + @pytest.mark.parametrize( + "combine, concat_dim", [("nested", "t"), ("by_coords", None)] + ) + @pytest.mark.parametrize( + "kwargs", [{"data_vars": "different"}, {"coords": "different"}] + ) + def test_open_mfdataset_warns_when_kwargs_set_to_different( + self, combine, concat_dim, kwargs + ) -> None: + with self.setup_files_and_datasets() as (files, [ds1, ds2]): + if combine == "by_coords": + files.reverse() + with pytest.raises( + ValueError, match="Previously the default was compat='no_conflicts'" + ): + open_mfdataset(files, combine=combine, concat_dim=concat_dim, **kwargs) + with pytest.raises( + ValueError, match="Previously the default was compat='equals'" + ): + xr.concat([ds1, ds2], dim="t", **kwargs) + + with set_options(use_new_combine_kwarg_defaults=False): + if "data_vars" not in kwargs: + expectation = pytest.warns( + FutureWarning, + match="will change from data_vars='all'", + ) + else: + expectation = nullcontext() + with pytest.warns( + FutureWarning, + match="will change from compat='equals'", + ): + with expectation: + ds_expect = xr.concat([ds1, ds2], dim="t", **kwargs) + with pytest.warns( + FutureWarning, match="will change from compat='no_conflicts'" + ): + with expectation: + with open_mfdataset( + files, combine=combine, concat_dim=concat_dim, **kwargs + ) as ds: + assert_identical(ds, ds_expect) + @requires_dask @requires_scipy @@ -4966,11 +5061,58 @@ def test_encoding_mfdataset(self) -> None: ds2.t.encoding["units"] = "days since 2000-01-01" ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) - with open_mfdataset([tmp1, tmp2], combine="nested") as actual: + with open_mfdataset( + [tmp1, tmp2], combine="nested", compat="no_conflicts", join="outer" + ) as actual: assert actual.t.encoding["units"] == original.t.encoding["units"] assert actual.t.encoding["units"] == ds1.t.encoding["units"] assert actual.t.encoding["units"] != ds2.t.encoding["units"] + def test_encoding_mfdataset_new_defaults(self) -> None: + original = Dataset( + { + "foo": ("t", np.random.randn(10)), + "t": ("t", pd.date_range(start="2010-01-01", periods=10, freq="1D")), + } + ) + original.t.encoding["units"] = "days since 2010-01-01" + + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + ds1 = original.isel(t=slice(5)) + ds2 = original.isel(t=slice(5, 10)) + ds1.t.encoding["units"] = "days since 2010-01-01" + ds2.t.encoding["units"] = "days since 2000-01-01" + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from join='outer' to join='exact'", + ): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + with open_mfdataset([tmp1, tmp2], combine="nested") as old: + assert ( + old.t.encoding["units"] + == original.t.encoding["units"] + ) + assert ( + old.t.encoding["units"] == ds1.t.encoding["units"] + ) + assert ( + old.t.encoding["units"] != ds2.t.encoding["units"] + ) + + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises( + ValueError, match="Error might be related to new default" + ): + open_mfdataset([tmp1, tmp2], combine="nested") + def test_preprocess_mfdataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp: @@ -5053,25 +5195,21 @@ def test_open_and_do_math(self) -> None: actual = 1.0 * ds assert_allclose(original, actual, decode_bytes=False) - def test_open_mfdataset_concat_dim_none(self) -> None: - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - data = Dataset({"x": 0}) - data.to_netcdf(tmp1) - Dataset({"x": np.nan}).to_netcdf(tmp2) - with open_mfdataset( - [tmp1, tmp2], concat_dim=None, combine="nested" - ) as actual: - assert_identical(data, actual) - - def test_open_mfdataset_concat_dim_default_none(self) -> None: - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - data = Dataset({"x": 0}) - data.to_netcdf(tmp1) - Dataset({"x": np.nan}).to_netcdf(tmp2) - with open_mfdataset([tmp1, tmp2], combine="nested") as actual: - assert_identical(data, actual) + @pytest.mark.parametrize( + "kwargs", + [pytest.param({"concat_dim": None}, id="none"), pytest.param({}, id="default")], + ) + def test_open_mfdataset_concat_dim(self, kwargs) -> None: + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + data = Dataset({"x": 0}) + data.to_netcdf(tmp1) + Dataset({"x": np.nan}).to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2], **kwargs, combine="nested" + ) as actual: + assert_identical(data, actual) def test_open_dataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) @@ -5098,7 +5236,9 @@ def test_open_single_dataset(self) -> None: ) with create_tmp_file() as tmp: original.to_netcdf(tmp) - with open_mfdataset([tmp], concat_dim=dim, combine="nested") as actual: + with open_mfdataset( + [tmp], concat_dim=dim, data_vars="all", combine="nested" + ) as actual: assert_identical(expected, actual) def test_open_multi_dataset(self) -> None: @@ -5122,7 +5262,7 @@ def test_open_multi_dataset(self) -> None: original.to_netcdf(tmp1) original.to_netcdf(tmp2) with open_mfdataset( - [tmp1, tmp2], concat_dim=dim, combine="nested" + [tmp1, tmp2], concat_dim=dim, data_vars="all", combine="nested" ) as actual: assert_identical(expected, actual) @@ -6579,19 +6719,20 @@ def test_zarr_safe_chunk_region(self, mode: Literal["r+", "a"]): @requires_h5netcdf @requires_fsspec def test_h5netcdf_storage_options() -> None: - with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2): - ds1 = create_test_data() - ds1.to_netcdf(f1, engine="h5netcdf") + with set_options(use_new_combine_kwarg_defaults=True): + with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2): + ds1 = create_test_data() + ds1.to_netcdf(f1, engine="h5netcdf") - ds2 = create_test_data() - ds2.to_netcdf(f2, engine="h5netcdf") + ds2 = create_test_data() + ds2.to_netcdf(f2, engine="h5netcdf") - files = [f"file://{f}" for f in [f1, f2]] - ds = xr.open_mfdataset( - files, - engine="h5netcdf", - concat_dim="time", - combine="nested", - storage_options={"skip_instance_cache": False}, - ) - assert_identical(xr.concat([ds1, ds2], dim="time"), ds) + files = [f"file://{f}" for f in [f1, f2]] + ds = xr.open_mfdataset( + files, + engine="h5netcdf", + concat_dim="time", + combine="nested", + storage_options={"skip_instance_cache": False}, + ) + assert_identical(xr.concat([ds1, ds2], dim="time"), ds) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 956bac350a2..c1d61a6f424 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -13,6 +13,7 @@ combine_nested, concat, merge, + set_options, ) from xarray.core import dtypes from xarray.core.combine import ( @@ -295,7 +296,7 @@ def test_concat_once(self, create_combined_ids, concat_dim): combine_attrs="drop", ) - expected_ds = concat([ds(0), ds(1)], dim=concat_dim) + expected_ds = concat([ds(0), ds(1)], data_vars="all", dim=concat_dim) assert_combined_tile_ids_equal(result, {(): expected_ds}) def test_concat_only_first_dim(self, create_combined_ids): @@ -340,7 +341,9 @@ def test_concat_twice(self, create_combined_ids, concat_dim): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim=concat_dim) + expected = concat( + [partway1, partway2, partway3], data_vars="all", dim=concat_dim + ) assert_equal(result, expected) @@ -432,7 +435,7 @@ def test_nested_concat_along_new_dim(self): Dataset({"a": ("x", [20]), "x": [0]}), ] expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]}) - actual = combine_nested(objs, concat_dim="t") + actual = combine_nested(objs, data_vars="all", concat_dim="t") assert_identical(expected, actual) # Same but with a DataArray as new dim, see GH #1988 and #2647 @@ -440,42 +443,51 @@ def test_nested_concat_along_new_dim(self): expected = Dataset( {"a": (("baz", "x"), [[10], [20]]), "x": [0], "baz": [100, 150]} ) - actual = combine_nested(objs, concat_dim=dim) + actual = combine_nested(objs, data_vars="all", concat_dim=dim) assert_identical(expected, actual) - def test_nested_merge(self): + def test_nested_merge_with_self(self): data = Dataset({"x": 0}) - actual = combine_nested([data, data, data], concat_dim=None) + actual = combine_nested( + [data, data, data], compat="no_conflicts", concat_dim=None + ) assert_identical(data, actual) + def test_nested_merge_with_overlapping_values(self): ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - actual = combine_nested([ds1, ds2], concat_dim=None) + actual = combine_nested( + [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=None + ) assert_identical(expected, actual) - actual = combine_nested([ds1, ds2], concat_dim=[None]) + actual = combine_nested( + [ds1, ds2], join="outer", compat="no_conflicts", concat_dim=[None] + ) assert_identical(expected, actual) + def test_nested_merge_with_nan(self): tmp1 = Dataset({"x": 0}) tmp2 = Dataset({"x": np.nan}) - actual = combine_nested([tmp1, tmp2], concat_dim=None) + actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=None) assert_identical(tmp1, actual) - actual = combine_nested([tmp1, tmp2], concat_dim=[None]) + actual = combine_nested([tmp1, tmp2], compat="no_conflicts", concat_dim=[None]) assert_identical(tmp1, actual) - # Single object, with a concat_dim explicitly provided + def test_nested_merge_with_concat_dim_explicitly_provided(self): # Test the issue reported in GH #1988 objs = [Dataset({"x": 0, "y": 1})] dim = DataArray([100], name="baz", dims="baz") - actual = combine_nested(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) assert_identical(expected, actual) + def test_nested_merge_with_non_scalars(self): # Just making sure that auto_combine is doing what is # expected for non-scalar values, too. objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] dim = DataArray([100], name="baz", dims="baz") - actual = combine_nested(objs, concat_dim=[dim]) + actual = combine_nested(objs, concat_dim=[dim], data_vars="all") expected = Dataset( {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])}, {"baz": [100]}, @@ -525,10 +537,15 @@ def test_auto_combine_2d(self): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim="dim2") + expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] - result = combine_nested(datasets, concat_dim=["dim1", "dim2"]) + result = combine_nested( + datasets, + data_vars="all", + compat="no_conflicts", + concat_dim=["dim1", "dim2"], + ) assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): @@ -537,7 +554,7 @@ def test_auto_combine_2d_combine_attrs_kwarg(self): partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") - expected = concat([partway1, partway2, partway3], dim="dim2") + expected = concat([partway1, partway2, partway3], data_vars="all", dim="dim2") expected_dict = {} expected_dict["drop"] = expected.copy(deep=True) @@ -568,12 +585,20 @@ def test_auto_combine_2d_combine_attrs_kwarg(self): with pytest.raises(ValueError, match=r"combine_attrs='identical'"): result = combine_nested( - datasets, concat_dim=["dim1", "dim2"], combine_attrs="identical" + datasets, + concat_dim=["dim1", "dim2"], + data_vars="all", + compat="no_conflicts", + combine_attrs="identical", ) for combine_attrs in expected_dict: result = combine_nested( - datasets, concat_dim=["dim1", "dim2"], combine_attrs=combine_attrs + datasets, + concat_dim=["dim1", "dim2"], + data_vars="all", + compat="no_conflicts", + combine_attrs=combine_attrs, ) assert_identical(result, expected_dict[combine_attrs]) @@ -587,7 +612,7 @@ def test_combine_nested_missing_data_new_dim(self): expected = Dataset( {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} ) - actual = combine_nested(datasets, concat_dim="t") + actual = combine_nested(datasets, data_vars="all", join="outer", concat_dim="t") assert_identical(expected, actual) def test_invalid_hypercube_input(self): @@ -665,7 +690,13 @@ def test_combine_nested_fill_value(self, fill_value): }, {"x": [0, 1, 2]}, ) - actual = combine_nested(datasets, concat_dim="t", fill_value=fill_value) + actual = combine_nested( + datasets, + concat_dim="t", + data_vars="all", + join="outer", + fill_value=fill_value, + ) assert_identical(expected, actual) def test_combine_nested_unnamed_data_arrays(self): @@ -725,26 +756,30 @@ def test_combine_by_coords(self): expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) + def test_combine_by_coords_handles_non_sorted_variables(self): # ensure auto_combine handles non-sorted variables objs = [ Dataset({"x": ("a", [0]), "y": ("a", [0]), "a": [0]}), Dataset({"x": ("a", [1]), "y": ("a", [1]), "a": [1]}), ] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1]}) assert_identical(expected, actual) + def test_combine_by_coords_multiple_variables(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, join="outer") expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) + def test_combine_by_coords_for_scalar_variables(self): objs = [Dataset({"x": 0}), Dataset({"x": 1})] with pytest.raises( ValueError, match=r"Could not find any dimension coordinates" ): combine_by_coords(objs) + def test_combine_by_coords_requires_coord_or_index(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with pytest.raises( ValueError, @@ -960,7 +995,9 @@ def test_combine_by_coords_combine_attrs_variables( with pytest.raises(MergeError, match="combine_attrs"): combine_by_coords([data1, data2], combine_attrs=combine_attrs) else: - actual = combine_by_coords([data1, data2], combine_attrs=combine_attrs) + actual = combine_by_coords( + [data1, data2], data_vars="all", combine_attrs=combine_attrs + ) expected = Dataset( { "x": ("a", [0, 1], expected_attrs), @@ -974,7 +1011,7 @@ def test_combine_by_coords_combine_attrs_variables( def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, data_vars="all", compat="no_conflicts") expected = data assert expected.broadcast_equals(actual) @@ -1012,7 +1049,7 @@ def test_combine_by_coords_previously_failed(self): Dataset({"a": ("x", [1]), "x": [1]}), ] expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]}) - actual = combine_by_coords(datasets) + actual = combine_by_coords(datasets, join="outer") assert_identical(expected, actual) def test_combine_by_coords_still_fails(self): @@ -1029,7 +1066,7 @@ def test_combine_by_coords_no_concat(self): assert_identical(expected, actual) objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] - actual = combine_by_coords(objs) + actual = combine_by_coords(objs, compat="no_conflicts") expected = Dataset({"x": 0, "y": 1, "z": 2}) assert_identical(expected, actual) @@ -1047,7 +1084,7 @@ def test_combine_by_coords_incomplete_hypercube(self): x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) - actual = combine_by_coords([x1, x2, x3]) + actual = combine_by_coords([x1, x2, x3], join="outer") expected = Dataset( {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, coords={"y": [0, 1], "x": [0, 1]}, @@ -1055,8 +1092,10 @@ def test_combine_by_coords_incomplete_hypercube(self): assert_identical(expected, actual) # test that this fails if fill_value is None - with pytest.raises(ValueError): - combine_by_coords([x1, x2, x3], fill_value=None) + with pytest.raises( + ValueError, match="supplied objects do not form a hypercube" + ): + combine_by_coords([x1, x2, x3], join="outer", fill_value=None) def test_combine_by_coords_override_order(self) -> None: # regression test for https://github.com/pydata/xarray/issues/8828 @@ -1126,7 +1165,7 @@ def test_combine_by_coords_all_named_dataarrays(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="b", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") - actual = combine_by_coords([named_da1, named_da2]) + actual = combine_by_coords([named_da1, named_da2], join="outer") expected = Dataset( { "a": DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x"), @@ -1139,11 +1178,146 @@ def test_combine_by_coords_all_dataarrays_with_the_same_name(self): named_da1 = DataArray(name="a", data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") named_da2 = DataArray(name="a", data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") - actual = combine_by_coords([named_da1, named_da2]) - expected = merge([named_da1, named_da2]) + actual = combine_by_coords( + [named_da1, named_da2], compat="no_conflicts", join="outer" + ) + expected = merge([named_da1, named_da2], compat="no_conflicts", join="outer") assert_identical(expected, actual) +class TestNewDefaults: + def test_concat_along_existing_dim(self): + concat_dim = "dim1" + ds = create_test_data + with set_options(use_new_combine_kwarg_defaults=False): + old = concat([ds(0), ds(1)], dim=concat_dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = concat([ds(0), ds(1)], dim=concat_dim) + + assert_identical(old, new) + + def test_concat_along_new_dim(self): + concat_dim = "new_dim" + ds = create_test_data + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = concat([ds(0), ds(1)], dim=concat_dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = concat([ds(0), ds(1)], dim=concat_dim) + + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_nested_merge_with_overlapping_values(self): + ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) + ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) + expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds1, ds2], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_nested([ds1, ds2], concat_dim=None) + + assert_identical(old, expected) + + def test_nested_merge_with_nan_order_matters(self): + ds1 = Dataset({"x": 0}) + ds2 = Dataset({"x": np.nan}) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds1, ds2], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested([ds1, ds2], concat_dim=None) + + assert_identical(ds1, old) + assert_identical(old, new) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = combine_nested([ds2, ds1], concat_dim=None) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested([ds2, ds1], concat_dim=None) + + assert_identical(ds1, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_nested_merge_with_concat_dim_explicitly_provided(self): + # Test the issue reported in GH #1988 + objs = [Dataset({"x": 0, "y": 1})] + dim = DataArray([100], name="baz", dims="baz") + expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = combine_nested(objs, concat_dim=dim) + with set_options(use_new_combine_kwarg_defaults=True): + new = combine_nested(objs, concat_dim=dim) + + assert_identical(expected, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_combine_nested_missing_data_new_dim(self): + # Your data includes "time" and "station" dimensions, and each year's + # data has a different set of stations. + datasets = [ + Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), + Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), + ] + expected = Dataset( + {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} + ) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + with pytest.warns( + FutureWarning, + match="will change from data_vars='all' to data_vars='minimal'", + ): + old = combine_nested(datasets, concat_dim="t") + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_nested(datasets, concat_dim="t") + + assert_identical(expected, old) + + def test_combine_by_coords_multiple_variables(self): + objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] + expected = Dataset({"x": [0, 1], "y": [0, 1]}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + old = combine_by_coords(objs) + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + combine_by_coords(objs) + + assert_identical(old, expected) + + @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index b970781fe28..34ad36a1e12 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -446,7 +446,11 @@ def test_concat_loads_variables(self): assert kernel_call_count == 0 out = xr.concat( - [ds1, ds2, ds3], dim="n", data_vars="different", coords="different" + [ds1, ds2, ds3], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) # each kernel is computed exactly once assert kernel_call_count == 6 @@ -488,7 +492,11 @@ def test_concat_loads_variables(self): # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={"d": ("x", [2.0])}, coords={"c": ("x", [2.0])}) out = xr.concat( - [ds1, ds2, ds4, ds3], dim="n", data_vars="different", coords="different" + [ds1, ds2, ds4, ds3], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 @@ -509,7 +517,11 @@ def test_concat_loads_variables(self): # now check that concat() is correctly using dask name equality to skip loads out = xr.concat( - [ds1, ds1, ds1], dim="n", data_vars="different", coords="different" + [ds1, ds1, ds1], + dim="n", + data_vars="different", + coords="different", + compat="equals", ) assert kernel_call_count == 24 # variables are not loaded in the output @@ -1375,7 +1387,9 @@ def test_map_blocks_ds_transformations(func, map_ds): def test_map_blocks_da_ds_with_template(obj): func = lambda x: x.isel(x=[1]) # a simple .isel(x=[1, 5, 9]) puts all those in a single chunk. - template = xr.concat([obj.isel(x=[i]) for i in [1, 5, 9]], dim="x") + template = xr.concat( + [obj.isel(x=[i]) for i in [1, 5, 9]], data_vars="minimal", dim="x" + ) with raise_if_dask_computes(): actual = xr.map_blocks(func, obj, template=template) assert_identical(actual, template) @@ -1448,7 +1462,9 @@ def test_map_blocks_errors_bad_template(obj): xr.map_blocks( lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values obj, - template=xr.concat([obj.isel(x=[i]) for i in [1, 5, 9]], dim="x"), + template=xr.concat( + [obj.isel(x=[i]) for i in [1, 5, 9]], data_vars="minimal", dim="x" + ), ).compute() diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 75d6d919e19..66546283d4b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1412,12 +1412,25 @@ def test_selection_multiindex_from_level(self) -> None: # GH: 3512 da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) - data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"]) + data = xr.concat( + [da, db], dim="x", coords="different", compat="equals" + ).set_index(xy=["x", "y"]) assert data.dims == ("xy",) actual = data.sel(y="a") expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y") assert_equal(actual, expected) + def test_concat_with_default_coords_warns(self) -> None: + da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) + db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) + + with pytest.warns(FutureWarning): + original = xr.concat([da, db], dim="x") + with set_options(use_new_combine_kwarg_defaults=True): + new = xr.concat([da, db], dim="x") + + assert original.y.shape != new.y.shape + def test_virtual_default_coords(self) -> None: array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7be2d13f9dd..28f932c8716 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6085,7 +6085,7 @@ def test_dataset_math_auto_align(self) -> None: assert_equal(actual, expected) actual = ds + ds[["bar"]] - expected = (2 * ds[["bar"]]).merge(ds.coords) + expected = (2 * ds[["bar"]]).merge(ds.coords, compat="override") assert_identical(expected, actual) assert_identical(ds + Dataset(), ds.coords.to_dataset()) @@ -6521,12 +6521,12 @@ def test_combine_first(self) -> None: coords={"x": ["a", "b", "c"]}, ) assert_equal(actual, expected) - assert_equal(actual, xr.merge([dsx0, dsx1])) + assert_equal(actual, xr.merge([dsx0, dsx1], join="outer")) # works just like xr.merge([self, other]) dsy2 = DataArray([2, 2, 2], [("x", ["b", "c", "d"])]).to_dataset(name="dsy2") actual = dsx0.combine_first(dsy2) - expected = xr.merge([dsy2, dsx0]) + expected = xr.merge([dsy2, dsx0], join="outer") assert_equal(actual, expected) def test_sortby(self) -> None: diff --git a/xarray/tests/test_duck_array_wrapping.py b/xarray/tests/test_duck_array_wrapping.py index 59928dce370..b0c9d40a8cc 100644 --- a/xarray/tests/test_duck_array_wrapping.py +++ b/xarray/tests/test_duck_array_wrapping.py @@ -155,7 +155,7 @@ def test_concat(self): assert isinstance(result.data, self.Array) def test_merge(self): - result = xr.merge([self.x1, self.x2], compat="override") + result = xr.merge([self.x1, self.x2], compat="override", join="outer") assert isinstance(result.foo.data, self.Array) def test_where(self): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d42f86f5ea6..be5ec0b28af 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2402,6 +2402,7 @@ def test_resample_min_count(self) -> None: for i in range(3) ], dim=actual["time"], + data_vars="all", ) assert_allclose(expected, actual) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 52935e9714e..7d346994d6b 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -6,6 +6,7 @@ import xarray as xr from xarray.core import dtypes, merge from xarray.core.merge import MergeError +from xarray.core.options import set_options from xarray.testing import assert_equal, assert_identical from xarray.tests.test_dataset import create_test_data @@ -36,15 +37,17 @@ def test_merge_arrays(self): expected = data[["var1", "var2"]] assert_identical(actual, expected) - def test_merge_datasets(self): - data = create_test_data(add_attrs=False, use_extension_array=True) + @pytest.mark.parametrize("use_new_combine_kwarg_defaults", [True, False]) + def test_merge_datasets(self, use_new_combine_kwarg_defaults): + with set_options(use_new_combine_kwarg_defaults=use_new_combine_kwarg_defaults): + data = create_test_data(add_attrs=False, use_extension_array=True) - actual = xr.merge([data[["var1"]], data[["var2"]]]) - expected = data[["var1", "var2"]] - assert_identical(actual, expected) + actual = xr.merge([data[["var1"]], data[["var2"]]]) + expected = data[["var1", "var2"]] + assert_identical(actual, expected) - actual = xr.merge([data, data]) - assert_identical(actual, data) + actual = xr.merge([data, data], compat="no_conflicts") + assert_identical(actual, data) def test_merge_dataarray_unnamed(self): data = xr.DataArray([1, 2], dims="x") @@ -191,9 +194,13 @@ def test_merge_arrays_attrs_variables( if expect_exception: with pytest.raises(MergeError, match="combine_attrs"): - actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + actual = xr.merge( + [data1, data2], compat="no_conflicts", combine_attrs=combine_attrs + ) else: - actual = xr.merge([data1, data2], combine_attrs=combine_attrs) + actual = xr.merge( + [data1, data2], compat="no_conflicts", combine_attrs=combine_attrs + ) expected = xr.Dataset( {"var1": ("dim1", [], expected_attrs)}, coords={"dim1": ("dim1", [], expected_attrs)}, @@ -266,8 +273,12 @@ def test_merge_no_conflicts_single_var(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - assert expected.identical(xr.merge([ds1, ds2], compat="no_conflicts")) - assert expected.identical(xr.merge([ds2, ds1], compat="no_conflicts")) + assert expected.identical( + xr.merge([ds1, ds2], compat="no_conflicts", join="outer") + ) + assert expected.identical( + xr.merge([ds2, ds1], compat="no_conflicts", join="outer") + ) assert ds1.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="left")) assert ds2.identical(xr.merge([ds1, ds2], compat="no_conflicts", join="right")) expected = xr.Dataset({"a": ("x", [2]), "x": [1]}) @@ -277,11 +288,11 @@ def test_merge_no_conflicts_single_var(self): with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) - xr.merge([ds1, ds3], compat="no_conflicts") + xr.merge([ds1, ds3], compat="no_conflicts", join="outer") with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) - xr.merge([ds1, ds3], compat="no_conflicts") + xr.merge([ds1, ds3], compat="no_conflicts", join="outer") def test_merge_no_conflicts_multi_var(self): data = create_test_data(add_attrs=False) @@ -303,17 +314,19 @@ def test_merge_no_conflicts_multi_var(self): def test_merge_no_conflicts_preserve_attrs(self): data = xr.Dataset({"x": ([], 0, {"foo": "bar"})}) - actual = xr.merge([data, data], combine_attrs="no_conflicts") + actual = xr.merge( + [data, data], compat="no_conflicts", combine_attrs="no_conflicts" + ) assert_identical(data, actual) def test_merge_no_conflicts_broadcast(self): datasets = [xr.Dataset({"x": ("y", [0])}), xr.Dataset({"x": np.nan})] - actual = xr.merge(datasets) + actual = xr.merge(datasets, compat="no_conflicts") expected = xr.Dataset({"x": ("y", [0])}) assert_identical(expected, actual) datasets = [xr.Dataset({"x": ("y", [np.nan])}), xr.Dataset({"x": 0})] - actual = xr.merge(datasets) + actual = xr.merge(datasets, compat="no_conflicts") assert_identical(expected, actual) @@ -329,27 +342,27 @@ def test_merge(self): actual = ds2.merge(ds1) assert_identical(expected, actual) - actual = data.merge(data) + actual = data.merge(data, compat="no_conflicts") assert_identical(data, actual) - actual = data.reset_coords(drop=True).merge(data) + actual = data.reset_coords(drop=True).merge(data, compat="no_conflicts") assert_identical(data, actual) - actual = data.merge(data.reset_coords(drop=True)) + actual = data.merge(data.reset_coords(drop=True), compat="no_conflicts") assert_identical(data, actual) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="conflicting values for variable"): ds1.merge(ds2.rename({"var3": "var1"})) with pytest.raises(ValueError, match=r"should be coordinates or not"): - data.reset_coords().merge(data) + data.reset_coords().merge(data, compat="no_conflicts") with pytest.raises(ValueError, match=r"should be coordinates or not"): - data.merge(data.reset_coords()) + data.merge(data.reset_coords(), compat="no_conflicts") def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": 0}) ds2 = xr.Dataset({"x": ("y", [0, 0])}) - actual = ds1.merge(ds2) + actual = ds1.merge(ds2, compat="no_conflicts") assert_identical(ds2, actual) - actual = ds2.merge(ds1) + actual = ds2.merge(ds1, compat="override") assert_identical(ds2, actual) actual = ds1.copy() @@ -358,7 +371,7 @@ def test_merge_broadcast_equals(self): ds1 = xr.Dataset({"x": np.nan}) ds2 = xr.Dataset({"x": ("y", [np.nan, np.nan])}) - actual = ds1.merge(ds2) + actual = ds1.merge(ds2, compat="no_conflicts") assert_identical(ds2, actual) def test_merge_compat(self): @@ -398,8 +411,8 @@ def test_merge_auto_align(self): expected = xr.Dataset( {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} ) - assert expected.identical(ds1.merge(ds2)) - assert expected.identical(ds2.merge(ds1)) + assert expected.identical(ds1.merge(ds2, join="outer")) + assert expected.identical(ds2.merge(ds1, join="outer")) expected = expected.isel(x=slice(2)) assert expected.identical(ds1.merge(ds2, join="left")) @@ -427,17 +440,19 @@ def test_merge_fill_value(self, fill_value): {"a": ("x", [1, 2, fill_value_a]), "b": ("x", [fill_value_b, 3, 4])}, {"x": [0, 1, 2]}, ) - assert expected.identical(ds1.merge(ds2, fill_value=fill_value)) - assert expected.identical(ds2.merge(ds1, fill_value=fill_value)) - assert expected.identical(xr.merge([ds1, ds2], fill_value=fill_value)) + assert expected.identical(ds1.merge(ds2, join="outer", fill_value=fill_value)) + assert expected.identical(ds2.merge(ds1, join="outer", fill_value=fill_value)) + assert expected.identical( + xr.merge([ds1, ds2], join="outer", fill_value=fill_value) + ) def test_merge_no_conflicts(self): ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = xr.Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) - assert expected.identical(ds1.merge(ds2, compat="no_conflicts")) - assert expected.identical(ds2.merge(ds1, compat="no_conflicts")) + assert expected.identical(ds1.merge(ds2, compat="no_conflicts", join="outer")) + assert expected.identical(ds2.merge(ds1, compat="no_conflicts", join="outer")) assert ds1.identical(ds1.merge(ds2, compat="no_conflicts", join="left")) @@ -448,11 +463,11 @@ def test_merge_no_conflicts(self): with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]}) - ds1.merge(ds3, compat="no_conflicts") + ds1.merge(ds3, compat="no_conflicts", join="outer") with pytest.raises(xr.MergeError): ds3 = xr.Dataset({"a": ("y", [2, 3]), "y": [1, 2]}) - ds1.merge(ds3, compat="no_conflicts") + ds1.merge(ds3, compat="no_conflicts", join="outer") def test_merge_dataarray(self): ds = xr.Dataset({"a": 0}) @@ -490,3 +505,80 @@ def test_merge_combine_attrs( actual = ds1.merge(ds2, combine_attrs=combine_attrs) expected = xr.Dataset(attrs=expected_attrs) assert_identical(actual, expected) + + +class TestNewDefaults: + def test_merge_datasets_false_warning(self): + data = create_test_data(add_attrs=False, use_extension_array=True) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = xr.merge([data, data]) + + with set_options(use_new_combine_kwarg_defaults=True): + new = xr.merge([data, data]) + + assert_identical(old, new) + + def test_merge(self): + data = create_test_data() + ds1 = data[["var1"]] + ds2 = data[["var3"]] + expected = data[["var1", "var3"]] + with set_options(use_new_combine_kwarg_defaults=True): + actual = ds1.merge(ds2) + assert_identical(expected, actual) + + actual = ds2.merge(ds1) + assert_identical(expected, actual) + + actual = data.merge(data) + assert_identical(data, actual) + + ds1.merge(ds2.rename({"var3": "var1"})) + + with pytest.raises(ValueError, match=r"should be coordinates or not"): + data.reset_coords().merge(data) + with pytest.raises(ValueError, match=r"should be coordinates or not"): + data.merge(data.reset_coords()) + + def test_merge_broadcast_equals(self): + ds1 = xr.Dataset({"x": 0}) + ds2 = xr.Dataset({"x": ("y", [0, 0])}) + + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, + match="will change from compat='no_conflicts' to compat='override'", + ): + old = ds1.merge(ds2) + + with set_options(use_new_combine_kwarg_defaults=True): + new = ds1.merge(ds2) + + assert_identical(ds2, old) + with pytest.raises(AssertionError): + assert_identical(old, new) + + def test_merge_auto_align(self): + ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) + ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]}) + expected = xr.Dataset( + {"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]} + ) + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + assert expected.identical(ds1.merge(ds2)) + with pytest.warns( + FutureWarning, match="will change from join='outer' to join='exact'" + ): + assert expected.identical(ds2.merge(ds1)) + + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError, match="might be related to new default"): + expected.identical(ds2.merge(ds1))