From 8681128d511e89d0e6dd592575ef4b714e74486d Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:46:08 +0000 Subject: [PATCH 01/15] add transforms module with scale function --- movement/transforms.py | 14 ++++++ tests/test_unit/test_transforms.py | 77 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 movement/transforms.py create mode 100644 tests/test_unit/test_transforms.py diff --git a/movement/transforms.py b/movement/transforms.py new file mode 100644 index 00000000..2cc4374f --- /dev/null +++ b/movement/transforms.py @@ -0,0 +1,14 @@ +"""Transforms module.""" + +import xarray as xr + + +def scale( + data_array: xr.DataArray, factor: float = 1.0, unit: str | None = None +) -> xr.DataArray: + """Scale data by a given factor with an optional unit.""" + scaled_data_array = data_array * factor + if unit is not None: + scaled_data_array.attrs["unit"] = unit + + return scaled_data_array diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py new file mode 100644 index 00000000..d045ea01 --- /dev/null +++ b/tests/test_unit/test_transforms.py @@ -0,0 +1,77 @@ +from typing import Any + +import numpy as np +import pytest +import xarray as xr + +from movement.transforms import scale + + +def nparray_0_to_23() -> np.ndarray: + return np.arange(0, 24).reshape(12, 2) + + +@pytest.fixture +def sample_data_array() -> xr.DataArray: + """Turn the nparray_0_to_23 into a DataArray.""" + return xr.DataArray( + nparray_0_to_23(), + dims=["time", "space"], + coords={"space": ["x", "y"]}, + ) + + +@pytest.mark.parametrize( + ["optional_arguments", "expected_output"], + [ + pytest.param( + {}, + nparray_0_to_23(), + id="Do nothing", + ), + pytest.param( + {"unit": "elephants"}, + xr.DataArray( + nparray_0_to_23(), + dims=["time", "space"], + coords={"space": ["x", "y"]}, + attrs={"unit": "elephants"}, + ), + id="Add example unit", + ), + pytest.param( + {"factor": 2}, + nparray_0_to_23() * 2, + id="Double", + ), + pytest.param( + {"factor": 0.5}, + nparray_0_to_23() * 0.5, + id="Halve", + ), + pytest.param( + {"factor": 0.5, "unit": "elephants"}, + xr.DataArray( + nparray_0_to_23() * 0.5, + dims=["time", "space"], + coords={"space": ["x", "y"]}, + attrs={"unit": "elephants"}, + ), + id="Halve and add example unit", + ), + ], +) +def test_scale( + sample_data_array: xr.DataArray, + optional_arguments: dict[str, Any], + expected_output: xr.DataArray, +): + if isinstance(expected_output, np.ndarray): + expected_output = xr.DataArray( + expected_output, + dims=["time", "space"], + coords={"space": ["x", "y"]}, + ) + + output_data_array = scale(sample_data_array, **optional_arguments) + xr.testing.assert_equal(output_data_array, expected_output) From fd356a0ac26fb56dfd5a86746f721fd171ee61c9 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Wed, 22 Jan 2025 15:04:50 +0000 Subject: [PATCH 02/15] scale drops unit if None provided --- movement/transforms.py | 3 +- tests/test_unit/test_transforms.py | 97 ++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 2cc4374f..ddec71d2 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -10,5 +10,6 @@ def scale( scaled_data_array = data_array * factor if unit is not None: scaled_data_array.attrs["unit"] = unit - + elif unit is None: + scaled_data_array.attrs.pop("unit", None) return scaled_data_array diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index d045ea01..99d5bf57 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -14,10 +14,21 @@ def nparray_0_to_23() -> np.ndarray: @pytest.fixture def sample_data_array() -> xr.DataArray: """Turn the nparray_0_to_23 into a DataArray.""" + return data_array_with_dims_and_coords(nparray_0_to_23()) + + +def data_array_with_dims_and_coords( + data: np.ndarray, + dims: list[str] = ("time", "space"), + coords: dict[str, list[str]] = {"space": ["x", "y"]}, + **attributes: Any, +) -> xr.DataArray: + """""" return xr.DataArray( - nparray_0_to_23(), - dims=["time", "space"], - coords={"space": ["x", "y"]}, + data, + dims=dims, + coords=coords, + attrs=attributes, ) @@ -26,38 +37,32 @@ def sample_data_array() -> xr.DataArray: [ pytest.param( {}, - nparray_0_to_23(), + data_array_with_dims_and_coords(nparray_0_to_23()), id="Do nothing", ), pytest.param( {"unit": "elephants"}, - xr.DataArray( - nparray_0_to_23(), - dims=["time", "space"], - coords={"space": ["x", "y"]}, - attrs={"unit": "elephants"}, + data_array_with_dims_and_coords( + nparray_0_to_23(), unit="elephants" ), - id="Add example unit", + id="No scaling, add unit", ), pytest.param( {"factor": 2}, - nparray_0_to_23() * 2, - id="Double", + data_array_with_dims_and_coords(nparray_0_to_23() * 2), + id="Double, no unit", ), pytest.param( {"factor": 0.5}, - nparray_0_to_23() * 0.5, - id="Halve", + data_array_with_dims_and_coords(nparray_0_to_23() * 0.5), + id="Halve, no unit", ), pytest.param( {"factor": 0.5, "unit": "elephants"}, - xr.DataArray( - nparray_0_to_23() * 0.5, - dims=["time", "space"], - coords={"space": ["x", "y"]}, - attrs={"unit": "elephants"}, + data_array_with_dims_and_coords( + nparray_0_to_23() * 0.5, unit="elephants" ), - id="Halve and add example unit", + id="Halve, add unit", ), ], ) @@ -66,12 +71,52 @@ def test_scale( optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): - if isinstance(expected_output, np.ndarray): - expected_output = xr.DataArray( - expected_output, - dims=["time", "space"], - coords={"space": ["x", "y"]}, - ) + expected_output = xr.DataArray( + expected_output, + dims=["time", "space"], + coords={"space": ["x", "y"]}, + ) output_data_array = scale(sample_data_array, **optional_arguments) xr.testing.assert_equal(output_data_array, expected_output) + assert output_data_array.attrs == expected_output.attrs + + +@pytest.mark.parametrize( + ["optional_arguments_1", "optional_arguments_2", "expected_output"], + [ + pytest.param( + {"factor": 2, "unit": "elephants"}, + {"factor": 0.5, "unit": "crabs"}, + data_array_with_dims_and_coords(nparray_0_to_23(), unit="crabs"), + id="No net scaling, final crabs unit", + ), + pytest.param( + {"factor": 2, "unit": "elephants"}, + {"factor": 0.5, "unit": None}, + data_array_with_dims_and_coords(nparray_0_to_23()), + id="No net scaling, no final unit", + ), + pytest.param( + {"factor": 2, "unit": None}, + {"factor": 0.5, "unit": "elephants"}, + data_array_with_dims_and_coords( + nparray_0_to_23(), unit="elephants" + ), + id="No net scaling, final elephant unit", + ), + ], +) +def test_scale_twice( + sample_data_array: xr.DataArray, + optional_arguments_1: dict[str, Any], + optional_arguments_2: dict[str, Any], + expected_output: xr.DataArray, +): + output_data_array = scale( + scale(sample_data_array, **optional_arguments_1), + **optional_arguments_2, + ) + + xr.testing.assert_equal(output_data_array, expected_output) + assert output_data_array.attrs == expected_output.attrs From b49be989bd67c65fb810d7a96fdf9fb82fab1419 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Wed, 22 Jan 2025 17:12:00 +0000 Subject: [PATCH 03/15] allow scaling across multiple dimensions --- movement/transforms.py | 28 +++++++++++++++++- tests/test_unit/test_transforms.py | 46 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/movement/transforms.py b/movement/transforms.py index ddec71d2..2ba66110 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -1,13 +1,39 @@ """Transforms module.""" +import numpy as np import xarray as xr def scale( - data_array: xr.DataArray, factor: float = 1.0, unit: str | None = None + data_array: xr.DataArray, + factor: float | np.ndarray[float] = 1.0, + unit: str | None = None, ) -> xr.DataArray: """Scale data by a given factor with an optional unit.""" + if not np.isscalar(factor): + factor = np.array(factor).squeeze() + if factor.ndim != 1: + raise ValueError( + f"Factor must be a scalar or a 1D array, got {factor.ndim}D" + ) + elif factor.shape[0] not in data_array.shape: + raise ValueError( + f"Factor shape {factor.shape} does not match " + f"the length of any data axes: {data_array.shape}" + ) + else: + # To figure out which dimension to broadcast along. + # Find dimensions with as many values as we have factors. + matching_dims = np.array(data_array.shape) == factor.shape[0] + # Find first dimension that matches. + first_matching_dim = np.argmax(matching_dims).item() + # Reshape factor to broadcast along the matching dimension. + factor_dims = [1] * data_array.ndim + factor_dims[first_matching_dim] = factor.shape[0] + # Reshape factor for broadcasting. + factor = factor.reshape(factor_dims) scaled_data_array = data_array * factor + if unit is not None: scaled_data_array.attrs["unit"] = unit elif unit is None: diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index 99d5bf57..dd063c2f 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -64,6 +64,20 @@ def data_array_with_dims_and_coords( ), id="Halve, add unit", ), + pytest.param( + {"factor": [0.5, 2]}, + data_array_with_dims_and_coords( + nparray_0_to_23() * [0.5, 2], + ), + id="x / 2, y * 2", + ), + pytest.param( + {"factor": np.array([0.5, 2]).reshape(1, 2)}, + data_array_with_dims_and_coords( + nparray_0_to_23() * [0.5, 2], + ), + id="x / 2, y * 2, should squeeze to cast across space", + ), ], ) def test_scale( @@ -82,6 +96,38 @@ def test_scale( assert output_data_array.attrs == expected_output.attrs +def test_scale_inverted_data() -> None: + factor = [0.5, 2] + transposed_data = data_array_with_dims_and_coords( + nparray_0_to_23().transpose(), dims=["space", "time"] + ) + output_array = scale(transposed_data, factor=factor) + expected_output = data_array_with_dims_and_coords( + (nparray_0_to_23() * factor).transpose(), dims=["space", "time"] + ) + xr.testing.assert_equal(output_array, expected_output) + + factor = [0.1, 0.2, 0.3, 0.4] + data_shape = (3, 5, 4, 2) + numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) + input_data = xr.DataArray(numerical_data, dims=["w", "x", "y", "z"]) + output_array = scale(input_data, factor=factor) + assert output_array.shape == input_data.shape + xr.testing.assert_equal( + output_array, input_data * np.array(factor).reshape(1, 1, 4, 1) + ) + + factor = [0.5, 1] + data_shape = (2, 2) + numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) + input_data = xr.DataArray(numerical_data, dims=["x", "y"]) + output_array = scale(input_data, factor=factor) + assert output_array.shape == input_data.shape + assert np.isclose(input_data.values[0] * 0.5, output_array.values[0]).all() + assert np.isclose(input_data.values[1], output_array.values[1]).all() + pass + + @pytest.mark.parametrize( ["optional_arguments_1", "optional_arguments_2", "expected_output"], [ From 1f6c4529f126c6a185ae7f2094a5eb33dcf53278 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Wed, 22 Jan 2025 18:57:30 +0000 Subject: [PATCH 04/15] add docstrings to transforms module and tests --- movement/transforms.py | 59 ++++++++++++++++++++++-------- tests/test_unit/test_transforms.py | 41 +++++++++++++-------- 2 files changed, 69 insertions(+), 31 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 2ba66110..e2e153d0 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -1,41 +1,68 @@ -"""Transforms module.""" +"""Transform and add unit attributes to xarray.DataArray datasets.""" import numpy as np import xarray as xr def scale( - data_array: xr.DataArray, + data: xr.DataArray, factor: float | np.ndarray[float] = 1.0, unit: str | None = None, ) -> xr.DataArray: - """Scale data by a given factor with an optional unit.""" + """Scale data by a given factor with an optional unit. + + Parameters + ---------- + data : xarray.DataArray + The input data to be scaled. + factor : float or np.ndarray of floats + The scaling factor to apply to the data. If factor is a scalar, all + dimensions of the data array are scaled by the same factor. If factor + is a list or an 1D array, the length of the array must match the length + of one of the data array's dimensions. The factor is broadcast + along the first matching dimension. + unit : str or None + The unit of the scaled data stored as a property in + xarray.DataArray.attrs['unit']. In case of the default (``None``) the + ``unit`` attribute is dropped. + + Returns + ------- + xarray.DataArray + The scaled data array. + + Notes + ----- + When scale is used multiple times on the same xarray.DataArray, + xarray.DataArray.attrs["unit"] is overwritten each time or is dropped if + ``None`` is passed by default or explicitly. + + When the factor is a scalar (a single number), the scaling factor is + applied to all dimensions, while if the factor is a list or array, the + factor is broadcasted along the first matching dimension. + + """ if not np.isscalar(factor): factor = np.array(factor).squeeze() if factor.ndim != 1: raise ValueError( f"Factor must be a scalar or a 1D array, got {factor.ndim}D" ) - elif factor.shape[0] not in data_array.shape: + elif factor.shape[0] not in data.shape: raise ValueError( f"Factor shape {factor.shape} does not match " - f"the length of any data axes: {data_array.shape}" + f"the length of any data axes: {data.shape}" ) else: - # To figure out which dimension to broadcast along. - # Find dimensions with as many values as we have factors. - matching_dims = np.array(data_array.shape) == factor.shape[0] - # Find first dimension that matches. + matching_dims = np.array(data.shape) == factor.shape[0] first_matching_dim = np.argmax(matching_dims).item() - # Reshape factor to broadcast along the matching dimension. - factor_dims = [1] * data_array.ndim + factor_dims = [1] * data.ndim factor_dims[first_matching_dim] = factor.shape[0] - # Reshape factor for broadcasting. factor = factor.reshape(factor_dims) - scaled_data_array = data_array * factor + scaled_data = data * factor if unit is not None: - scaled_data_array.attrs["unit"] = unit + scaled_data.attrs["unit"] = unit elif unit is None: - scaled_data_array.attrs.pop("unit", None) - return scaled_data_array + scaled_data.attrs.pop("unit", None) + return scaled_data diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index dd063c2f..be89010b 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -8,11 +8,12 @@ def nparray_0_to_23() -> np.ndarray: + """Create a 2D nparray from 0 to 23.""" return np.arange(0, 24).reshape(12, 2) @pytest.fixture -def sample_data_array() -> xr.DataArray: +def sample_data() -> xr.DataArray: """Turn the nparray_0_to_23 into a DataArray.""" return data_array_with_dims_and_coords(nparray_0_to_23()) @@ -23,7 +24,9 @@ def data_array_with_dims_and_coords( coords: dict[str, list[str]] = {"space": ["x", "y"]}, **attributes: Any, ) -> xr.DataArray: - """""" + """Create a DataArray with given data, dimensions, coordinates, and + attributes (e.g. unit or factor). + """ return xr.DataArray( data, dims=dims, @@ -81,22 +84,22 @@ def data_array_with_dims_and_coords( ], ) def test_scale( - sample_data_array: xr.DataArray, + sample_data: xr.DataArray, optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): - expected_output = xr.DataArray( - expected_output, - dims=["time", "space"], - coords={"space": ["x", "y"]}, - ) + """Test scaling with different factors and units.""" + scaled_data = scale(sample_data, **optional_arguments) + xr.testing.assert_equal(scaled_data, expected_output) + assert scaled_data.attrs == expected_output.attrs - output_data_array = scale(sample_data_array, **optional_arguments) - xr.testing.assert_equal(output_data_array, expected_output) - assert output_data_array.attrs == expected_output.attrs +def test_scale_inverted_data(): + """Test scaling with transposed data along the correct dimension. -def test_scale_inverted_data() -> None: + The factor is reshaped to (1, 1, 4, 1) so that it can be broadcasted along + the third dimension ("y") which matches the length of the scaling factor. + """ factor = [0.5, 2] transposed_data = data_array_with_dims_and_coords( nparray_0_to_23().transpose(), dims=["space", "time"] @@ -117,6 +120,11 @@ def test_scale_inverted_data() -> None: output_array, input_data * np.array(factor).reshape(1, 1, 4, 1) ) + +def test_scale_first_matching_axis(): + """Test scaling when multiple axes match the scaling factor's length. + The scaling factor should be broadcasted along the first matching axis. + """ factor = [0.5, 1] data_shape = (2, 2) numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) @@ -125,7 +133,6 @@ def test_scale_inverted_data() -> None: assert output_array.shape == input_data.shape assert np.isclose(input_data.values[0] * 0.5, output_array.values[0]).all() assert np.isclose(input_data.values[1], output_array.values[1]).all() - pass @pytest.mark.parametrize( @@ -154,13 +161,17 @@ def test_scale_inverted_data() -> None: ], ) def test_scale_twice( - sample_data_array: xr.DataArray, + sample_data: xr.DataArray, optional_arguments_1: dict[str, Any], optional_arguments_2: dict[str, Any], expected_output: xr.DataArray, ): + """Test scaling when applied twice. + The second scaling operation should update the unit attribute if provided, + or remove it if None is passed explicitly or by default. + """ output_data_array = scale( - scale(sample_data_array, **optional_arguments_1), + scale(sample_data, **optional_arguments_1), **optional_arguments_2, ) From d20b474043d773d524287602695131623f680ca6 Mon Sep 17 00:00:00 2001 From: willGraham01 Date: Thu, 23 Jan 2025 16:18:20 +0000 Subject: [PATCH 05/15] Fix mypy being angry at us --- movement/transforms.py | 2 +- tests/test_unit/test_transforms.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index e2e153d0..671a5191 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -6,7 +6,7 @@ def scale( data: xr.DataArray, - factor: float | np.ndarray[float] = 1.0, + factor: float | np.ndarray = 1.0, unit: str | None = None, ) -> xr.DataArray: """Scale data by a given factor with an optional unit. diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index be89010b..d0b56bd1 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -6,6 +6,8 @@ from movement.transforms import scale +DEFAULT_SPATIAL_COORDS = {"space": ["x", "y"]} + def nparray_0_to_23() -> np.ndarray: """Create a 2D nparray from 0 to 23.""" @@ -20,8 +22,8 @@ def sample_data() -> xr.DataArray: def data_array_with_dims_and_coords( data: np.ndarray, - dims: list[str] = ("time", "space"), - coords: dict[str, list[str]] = {"space": ["x", "y"]}, + dims: list | tuple = ("time", "space"), + coords: dict[str, list[str]] = DEFAULT_SPATIAL_COORDS, **attributes: Any, ) -> xr.DataArray: """Create a DataArray with given data, dimensions, coordinates, and From 412634bc39ad16b339ea89857c2a001a1738976b Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Thu, 23 Jan 2025 17:59:17 +0000 Subject: [PATCH 06/15] add test_scale_value_error --- tests/test_unit/test_transforms.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index d0b56bd1..fe80b0e8 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -176,6 +176,30 @@ def test_scale_twice( scale(sample_data, **optional_arguments_1), **optional_arguments_2, ) - xr.testing.assert_equal(output_data_array, expected_output) assert output_data_array.attrs == expected_output.attrs + + +@pytest.mark.parametrize( + "invalid_factor, expected_error_message", + [ + ( + np.zeros((3, 3, 4)), + "Factor must be a scalar or a 1D array, got 3D", + ), + ( + np.zeros(3), + "Factor shape (3,) does not match the length of" + " any data axes: (12, 2)", + ), + ], +) +def test_scale_value_error( + sample_data: xr.DataArray, + invalid_factor: np.ndarray, + expected_error_message: str, +): + """Test invalid factors raise correct error type and message.""" + with pytest.raises(ValueError) as error: + scale(sample_data, factor=invalid_factor) + assert str(error.value) == expected_error_message From 73d2a073a1d5090f163f8fb1cc36352c1b1416b0 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:00:33 +0000 Subject: [PATCH 07/15] Use ArrayLike typehint for factor and rename unit to space_unit --- movement/transforms.py | 39 ++++++++++++----------- tests/test_unit/test_transforms.py | 51 ++++++++++++++++-------------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 671a5191..02d71cfa 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -2,12 +2,13 @@ import numpy as np import xarray as xr +from numpy.typing import ArrayLike def scale( data: xr.DataArray, - factor: float | np.ndarray = 1.0, - unit: str | None = None, + factor: ArrayLike = 1.0, + space_unit: str | None = None, ) -> xr.DataArray: """Scale data by a given factor with an optional unit. @@ -15,16 +16,17 @@ def scale( ---------- data : xarray.DataArray The input data to be scaled. - factor : float or np.ndarray of floats - The scaling factor to apply to the data. If factor is a scalar, all - dimensions of the data array are scaled by the same factor. If factor - is a list or an 1D array, the length of the array must match the length - of one of the data array's dimensions. The factor is broadcast - along the first matching dimension. - unit : str or None + factor : ArrayLike + The scaling factor to apply to the data. Any object that can be + converted to a 1D numpy array is valid (e.g. a single float or a list + of floats). If factor is a single float, the data array is uniformly + scaled by the same factor. If factor contains multiple floats, the + length of the resulting array must match the length of data array's + unit dimension along which it will be broadcasted. + space_unit : str or None The unit of the scaled data stored as a property in - xarray.DataArray.attrs['unit']. In case of the default (``None``) the - ``unit`` attribute is dropped. + xarray.DataArray.attrs['space_unit']. In case of the default (``None``) + the ``space_unit`` attribute is dropped. Returns ------- @@ -34,8 +36,8 @@ def scale( Notes ----- When scale is used multiple times on the same xarray.DataArray, - xarray.DataArray.attrs["unit"] is overwritten each time or is dropped if - ``None`` is passed by default or explicitly. + xarray.DataArray.attrs["space_unit"] is overwritten each time or is dropped + if ``None`` is passed by default or explicitly. When the factor is a scalar (a single number), the scaling factor is applied to all dimensions, while if the factor is a list or array, the @@ -46,7 +48,8 @@ def scale( factor = np.array(factor).squeeze() if factor.ndim != 1: raise ValueError( - f"Factor must be a scalar or a 1D array, got {factor.ndim}D" + "Factor must be an object that can be converted to a 1D numpy" + f" array, got {factor.ndim}D" ) elif factor.shape[0] not in data.shape: raise ValueError( @@ -61,8 +64,8 @@ def scale( factor = factor.reshape(factor_dims) scaled_data = data * factor - if unit is not None: - scaled_data.attrs["unit"] = unit - elif unit is None: - scaled_data.attrs.pop("unit", None) + if space_unit is not None: + scaled_data.attrs["space_unit"] = space_unit + elif space_unit is None: + scaled_data.attrs.pop("space_unit", None) return scaled_data diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index fe80b0e8..b0ea6717 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -27,7 +27,7 @@ def data_array_with_dims_and_coords( **attributes: Any, ) -> xr.DataArray: """Create a DataArray with given data, dimensions, coordinates, and - attributes (e.g. unit or factor). + attributes (e.g. space_unit or factor). """ return xr.DataArray( data, @@ -46,28 +46,28 @@ def data_array_with_dims_and_coords( id="Do nothing", ), pytest.param( - {"unit": "elephants"}, + {"space_unit": "elephants"}, data_array_with_dims_and_coords( - nparray_0_to_23(), unit="elephants" + nparray_0_to_23(), space_unit="elephants" ), - id="No scaling, add unit", + id="No scaling, add space_unit", ), pytest.param( {"factor": 2}, data_array_with_dims_and_coords(nparray_0_to_23() * 2), - id="Double, no unit", + id="Double, no space_unit", ), pytest.param( {"factor": 0.5}, data_array_with_dims_and_coords(nparray_0_to_23() * 0.5), - id="Halve, no unit", + id="Halve, no space_unit", ), pytest.param( - {"factor": 0.5, "unit": "elephants"}, + {"factor": 0.5, "space_unit": "elephants"}, data_array_with_dims_and_coords( - nparray_0_to_23() * 0.5, unit="elephants" + nparray_0_to_23() * 0.5, space_unit="elephants" ), - id="Halve, add unit", + id="Halve, add space_unit", ), pytest.param( {"factor": [0.5, 2]}, @@ -90,7 +90,7 @@ def test_scale( optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): - """Test scaling with different factors and units.""" + """Test scaling with different factors and space_units.""" scaled_data = scale(sample_data, **optional_arguments) xr.testing.assert_equal(scaled_data, expected_output) assert scaled_data.attrs == expected_output.attrs @@ -141,24 +141,26 @@ def test_scale_first_matching_axis(): ["optional_arguments_1", "optional_arguments_2", "expected_output"], [ pytest.param( - {"factor": 2, "unit": "elephants"}, - {"factor": 0.5, "unit": "crabs"}, - data_array_with_dims_and_coords(nparray_0_to_23(), unit="crabs"), - id="No net scaling, final crabs unit", + {"factor": 2, "space_unit": "elephants"}, + {"factor": 0.5, "space_unit": "crabs"}, + data_array_with_dims_and_coords( + nparray_0_to_23(), space_unit="crabs" + ), + id="No net scaling, final crabs space_unit", ), pytest.param( - {"factor": 2, "unit": "elephants"}, - {"factor": 0.5, "unit": None}, + {"factor": 2, "space_unit": "elephants"}, + {"factor": 0.5, "space_unit": None}, data_array_with_dims_and_coords(nparray_0_to_23()), - id="No net scaling, no final unit", + id="No net scaling, no final space_unit", ), pytest.param( - {"factor": 2, "unit": None}, - {"factor": 0.5, "unit": "elephants"}, + {"factor": 2, "space_unit": None}, + {"factor": 0.5, "space_unit": "elephants"}, data_array_with_dims_and_coords( - nparray_0_to_23(), unit="elephants" + nparray_0_to_23(), space_unit="elephants" ), - id="No net scaling, final elephant unit", + id="No net scaling, final elephant space_unit", ), ], ) @@ -169,8 +171,8 @@ def test_scale_twice( expected_output: xr.DataArray, ): """Test scaling when applied twice. - The second scaling operation should update the unit attribute if provided, - or remove it if None is passed explicitly or by default. + The second scaling operation should update the space_unit attribute if + provided, or remove it if None is passed explicitly or by default. """ output_data_array = scale( scale(sample_data, **optional_arguments_1), @@ -185,7 +187,8 @@ def test_scale_twice( [ ( np.zeros((3, 3, 4)), - "Factor must be a scalar or a 1D array, got 3D", + "Factor must be an object that can be converted to a 1D numpy" + " array, got 3D", ), ( np.zeros(3), From 515e19ccf40e67eb17f832c3f3aab5ac50460592 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Mon, 27 Jan 2025 17:22:18 +0000 Subject: [PATCH 08/15] ensure broadcasting always happens along the space dimension --- movement/transforms.py | 12 +++--- tests/test_unit/test_transforms.py | 66 ++++++++++++++++-------------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 02d71cfa..aa6ec19e 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -51,16 +51,14 @@ def scale( "Factor must be an object that can be converted to a 1D numpy" f" array, got {factor.ndim}D" ) - elif factor.shape[0] not in data.shape: + elif factor.shape[0] != data.space.values.shape[0]: raise ValueError( - f"Factor shape {factor.shape} does not match " - f"the length of any data axes: {data.shape}" + f"Factor length {factor.shape[0]} does not match the length " + f"of the space dimension {data.space.values.shape[0]}" ) else: - matching_dims = np.array(data.shape) == factor.shape[0] - first_matching_dim = np.argmax(matching_dims).item() - factor_dims = [1] * data.ndim - factor_dims[first_matching_dim] = factor.shape[0] + factor_dims = [1] * data.ndim # 1s array matching data dimensions + factor_dims[data.get_axis_num("space")] = factor.shape[0] factor = factor.reshape(factor_dims) scaled_data = data * factor diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index b0ea6717..46ec5512 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -96,45 +96,49 @@ def test_scale( assert scaled_data.attrs == expected_output.attrs -def test_scale_inverted_data(): +def test_scale_space_dimension_two_dims(): """Test scaling with transposed data along the correct dimension. - The factor is reshaped to (1, 1, 4, 1) so that it can be broadcasted along - the third dimension ("y") which matches the length of the scaling factor. + The scaling factor should be broadcasted along the space axis irrespective + of the order of the dimensions in the input data. """ factor = [0.5, 2] - transposed_data = data_array_with_dims_and_coords( - nparray_0_to_23().transpose(), dims=["space", "time"] + + data_space_second = data_array_with_dims_and_coords( + nparray_0_to_23(), dims=["time", "space"] ) - output_array = scale(transposed_data, factor=factor) - expected_output = data_array_with_dims_and_coords( - (nparray_0_to_23() * factor).transpose(), dims=["space", "time"] + data_space_first = data_array_with_dims_and_coords( + nparray_0_to_23().transpose(), dims=["space", "time"] ) - xr.testing.assert_equal(output_array, expected_output) - factor = [0.1, 0.2, 0.3, 0.4] - data_shape = (3, 5, 4, 2) - numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - input_data = xr.DataArray(numerical_data, dims=["w", "x", "y", "z"]) - output_array = scale(input_data, factor=factor) - assert output_array.shape == input_data.shape + scaled_data_space_second = scale(data_space_second, factor=factor) + scaled_data_space_first = scale(data_space_first, factor=factor) + xr.testing.assert_equal( - output_array, input_data * np.array(factor).reshape(1, 1, 4, 1) + scaled_data_space_second, scaled_data_space_first.transpose() ) -def test_scale_first_matching_axis(): - """Test scaling when multiple axes match the scaling factor's length. - The scaling factor should be broadcasted along the first matching axis. +def test_scale_space_dimension_four_dims(): + """Test scaling with data having four dimensions. + + The scaling factor should be broadcasted along the space axis irrespective + of the order of the dimensions in the input data. """ - factor = [0.5, 1] - data_shape = (2, 2) + factor = [0.5, 2] + data_shape = (3, 6, 4, 2) numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - input_data = xr.DataArray(numerical_data, dims=["x", "y"]) - output_array = scale(input_data, factor=factor) - assert output_array.shape == input_data.shape - assert np.isclose(input_data.values[0] * 0.5, output_array.values[0]).all() - assert np.isclose(input_data.values[1], output_array.values[1]).all() + data_space_fourth = xr.DataArray( + numerical_data, dims=["time", "individuals", "keypoints", "space"] + ) + scaled_data_space_fourth = scale(data_space_fourth, factor=factor) + + assert scaled_data_space_fourth.shape == data_space_fourth.shape + + expected_output_data = data_space_fourth * np.array(factor).reshape( + 1, 1, 1, 2 + ) + xr.testing.assert_equal(scaled_data_space_fourth, expected_output_data) @pytest.mark.parametrize( @@ -185,15 +189,17 @@ def test_scale_twice( @pytest.mark.parametrize( "invalid_factor, expected_error_message", [ - ( + pytest.param( np.zeros((3, 3, 4)), "Factor must be an object that can be converted to a 1D numpy" " array, got 3D", + id="3D factor", ), - ( + pytest.param( np.zeros(3), - "Factor shape (3,) does not match the length of" - " any data axes: (12, 2)", + "Factor length 3 does not match the length " + "of the space dimension 2", + id="space dimension mismatch", ), ], ) From 362196fcdd66554d8648770876bebdc05c34d3aa Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Mon, 27 Jan 2025 17:52:37 +0000 Subject: [PATCH 09/15] parametrize and refactor test_scale_space_dimension --- tests/test_unit/test_transforms.py | 51 ++++++++++-------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index 46ec5512..e27fb3fc 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -96,49 +96,30 @@ def test_scale( assert scaled_data.attrs == expected_output.attrs -def test_scale_space_dimension_two_dims(): +@pytest.mark.parametrize( + "dims, data_shape", + [ + (["time", "space"], (3, 2)), + (["space", "time"], (2, 3)), + (["time", "individuals", "keypoints", "space"], (3, 6, 4, 2)), + ], + ids=["time-space", "space-time", "time-individuals-keypoints-space"], +) +def test_scale_space_dimension(dims: list[str], data_shape): """Test scaling with transposed data along the correct dimension. The scaling factor should be broadcasted along the space axis irrespective of the order of the dimensions in the input data. """ factor = [0.5, 2] - - data_space_second = data_array_with_dims_and_coords( - nparray_0_to_23(), dims=["time", "space"] - ) - data_space_first = data_array_with_dims_and_coords( - nparray_0_to_23().transpose(), dims=["space", "time"] - ) - - scaled_data_space_second = scale(data_space_second, factor=factor) - scaled_data_space_first = scale(data_space_first, factor=factor) - - xr.testing.assert_equal( - scaled_data_space_second, scaled_data_space_first.transpose() - ) - - -def test_scale_space_dimension_four_dims(): - """Test scaling with data having four dimensions. - - The scaling factor should be broadcasted along the space axis irrespective - of the order of the dimensions in the input data. - """ - factor = [0.5, 2] - data_shape = (3, 6, 4, 2) numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - data_space_fourth = xr.DataArray( - numerical_data, dims=["time", "individuals", "keypoints", "space"] - ) - scaled_data_space_fourth = scale(data_space_fourth, factor=factor) + data = xr.DataArray(numerical_data, dims=dims) + scaled_data = scale(data, factor=factor) + broadcast_list = [1 if dim != "space" else len(factor) for dim in dims] + expected_output_data = data * np.array(factor).reshape(broadcast_list) - assert scaled_data_space_fourth.shape == data_space_fourth.shape - - expected_output_data = data_space_fourth * np.array(factor).reshape( - 1, 1, 1, 2 - ) - xr.testing.assert_equal(scaled_data_space_fourth, expected_output_data) + assert scaled_data.shape == data.shape + xr.testing.assert_equal(scaled_data, expected_output_data) @pytest.mark.parametrize( From 9fd47f578937d679b263ced1b76838b1d821387c Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Mon, 27 Jan 2025 18:15:00 +0000 Subject: [PATCH 10/15] add validate_dims_coords to scale function and compare space shape to factor shape (instead of length) --- movement/transforms.py | 10 +++++++--- tests/test_unit/test_transforms.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index aa6ec19e..c11290b7 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -4,6 +4,8 @@ import xarray as xr from numpy.typing import ArrayLike +from movement.validators.arrays import validate_dims_coords + def scale( data: xr.DataArray, @@ -44,6 +46,8 @@ def scale( factor is broadcasted along the first matching dimension. """ + validate_dims_coords(data, {"space": ["x", "y"]}) + if not np.isscalar(factor): factor = np.array(factor).squeeze() if factor.ndim != 1: @@ -51,10 +55,10 @@ def scale( "Factor must be an object that can be converted to a 1D numpy" f" array, got {factor.ndim}D" ) - elif factor.shape[0] != data.space.values.shape[0]: + elif factor.shape != data.space.values.shape: raise ValueError( - f"Factor length {factor.shape[0]} does not match the length " - f"of the space dimension {data.space.values.shape[0]}" + f"Factor shape {factor.shape} does not match the shape " + f"of the space dimension {data.space.values.shape}" ) else: factor_dims = [1] * data.ndim # 1s array matching data dimensions diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index e27fb3fc..34b0610c 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -113,7 +113,9 @@ def test_scale_space_dimension(dims: list[str], data_shape): """ factor = [0.5, 2] numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - data = xr.DataArray(numerical_data, dims=dims) + data = xr.DataArray( + numerical_data, dims=dims, coords=DEFAULT_SPATIAL_COORDS + ) scaled_data = scale(data, factor=factor) broadcast_list = [1 if dim != "space" else len(factor) for dim in dims] expected_output_data = data * np.array(factor).reshape(broadcast_list) From 14c31a5d110593716715e4bacf8932fd52df2e5a Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Mon, 27 Jan 2025 18:26:51 +0000 Subject: [PATCH 11/15] add test case with number of spatial dimensions equal to the number of timepoints, individuals, and keypoints --- tests/test_unit/test_transforms.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index 34b0610c..89cb0cf3 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -102,8 +102,14 @@ def test_scale( (["time", "space"], (3, 2)), (["space", "time"], (2, 3)), (["time", "individuals", "keypoints", "space"], (3, 6, 4, 2)), + (["time", "individuals", "keypoints", "space"], (2, 2, 2, 2)), + ], + ids=[ + "time-space", + "space-time", + "time-individuals-keypoints-space", + "2x2x2x2", ], - ids=["time-space", "space-time", "time-individuals-keypoints-space"], ) def test_scale_space_dimension(dims: list[str], data_shape): """Test scaling with transposed data along the correct dimension. @@ -180,8 +186,8 @@ def test_scale_twice( ), pytest.param( np.zeros(3), - "Factor length 3 does not match the length " - "of the space dimension 2", + "Factor shape (3,) does not match the shape " + "of the space dimension (2,)", id="space dimension mismatch", ), ], From fea92306cc6debe54eec4a2ee9c20602e6fe90e6 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Tue, 28 Jan 2025 11:10:55 +0000 Subject: [PATCH 12/15] add float to scale typing for factor --- movement/transforms.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index c11290b7..61d11afe 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -9,7 +9,7 @@ def scale( data: xr.DataArray, - factor: ArrayLike = 1.0, + factor: ArrayLike | float = 1.0, space_unit: str | None = None, ) -> xr.DataArray: """Scale data by a given factor with an optional unit. @@ -18,13 +18,13 @@ def scale( ---------- data : xarray.DataArray The input data to be scaled. - factor : ArrayLike - The scaling factor to apply to the data. Any object that can be - converted to a 1D numpy array is valid (e.g. a single float or a list - of floats). If factor is a single float, the data array is uniformly - scaled by the same factor. If factor contains multiple floats, the - length of the resulting array must match the length of data array's - unit dimension along which it will be broadcasted. + factor : ArrayLike or float + The scaling factor to apply to the data. If factor is a scalar (a + single float), the data array is uniformly scaled by the same factor. + If factor is an object that can be converted to a 1D numpy array (e.g. + a list of floats), the length of the resulting array must match the + length of data array's space dimension along which it will be + broadcasted. space_unit : str or None The unit of the scaled data stored as a property in xarray.DataArray.attrs['space_unit']. In case of the default (``None``) From 7d5423dcb4f76e4f36fcf8220f4c0cbdebbb71b5 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:54:06 +0000 Subject: [PATCH 13/15] add 3D space coords validation and test --- movement/transforms.py | 9 ++-- tests/test_unit/test_transforms.py | 69 ++++++++++++++++++++++++------ 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 61d11afe..62435368 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -41,11 +41,12 @@ def scale( xarray.DataArray.attrs["space_unit"] is overwritten each time or is dropped if ``None`` is passed by default or explicitly. - When the factor is a scalar (a single number), the scaling factor is - applied to all dimensions, while if the factor is a list or array, the - factor is broadcasted along the first matching dimension. - """ + if data.space.ndim == 2: + validate_dims_coords(data, {"space": ["x", "y"]}) + else: + validate_dims_coords(data, {"space": ["x", "y", "z"]}) + validate_dims_coords(data, {"space": ["x", "y"]}) if not np.isscalar(factor): diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index 89cb0cf3..d2182352 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -6,7 +6,8 @@ from movement.transforms import scale -DEFAULT_SPATIAL_COORDS = {"space": ["x", "y"]} +SPATIAL_COORDS_2D = {"space": ["x", "y"]} +SPATIAL_COORDS_3D = {"space": ["x", "y", "z"]} def nparray_0_to_23() -> np.ndarray: @@ -15,19 +16,29 @@ def nparray_0_to_23() -> np.ndarray: @pytest.fixture -def sample_data() -> xr.DataArray: +def sample_data_2d() -> xr.DataArray: """Turn the nparray_0_to_23 into a DataArray.""" return data_array_with_dims_and_coords(nparray_0_to_23()) +@pytest.fixture +def sample_data_3d() -> xr.DataArray: + """Turn the nparray_0_to_23 into a DataArray with 3D space.""" + return data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3), + coords=SPATIAL_COORDS_3D, + ) + + def data_array_with_dims_and_coords( data: np.ndarray, dims: list | tuple = ("time", "space"), - coords: dict[str, list[str]] = DEFAULT_SPATIAL_COORDS, + coords: dict[str, list[str]] = SPATIAL_COORDS_2D, **attributes: Any, ) -> xr.DataArray: """Create a DataArray with given data, dimensions, coordinates, and - attributes (e.g. space_unit or factor). + attributes (e.g. space_unit or factor). The default space coordinates + are x and y (2D). """ return xr.DataArray( data, @@ -86,12 +97,12 @@ def data_array_with_dims_and_coords( ], ) def test_scale( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): """Test scaling with different factors and space_units.""" - scaled_data = scale(sample_data, **optional_arguments) + scaled_data = scale(sample_data_2d, **optional_arguments) xr.testing.assert_equal(scaled_data, expected_output) assert scaled_data.attrs == expected_output.attrs @@ -119,9 +130,7 @@ def test_scale_space_dimension(dims: list[str], data_shape): """ factor = [0.5, 2] numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - data = xr.DataArray( - numerical_data, dims=dims, coords=DEFAULT_SPATIAL_COORDS - ) + data = xr.DataArray(numerical_data, dims=dims, coords=SPATIAL_COORDS_2D) scaled_data = scale(data, factor=factor) broadcast_list = [1 if dim != "space" else len(factor) for dim in dims] expected_output_data = data * np.array(factor).reshape(broadcast_list) @@ -158,7 +167,7 @@ def test_scale_space_dimension(dims: list[str], data_shape): ], ) def test_scale_twice( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, optional_arguments_1: dict[str, Any], optional_arguments_2: dict[str, Any], expected_output: xr.DataArray, @@ -168,7 +177,7 @@ def test_scale_twice( provided, or remove it if None is passed explicitly or by default. """ output_data_array = scale( - scale(sample_data, **optional_arguments_1), + scale(sample_data_2d, **optional_arguments_1), **optional_arguments_2, ) xr.testing.assert_equal(output_data_array, expected_output) @@ -193,11 +202,45 @@ def test_scale_twice( ], ) def test_scale_value_error( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, invalid_factor: np.ndarray, expected_error_message: str, ): """Test invalid factors raise correct error type and message.""" with pytest.raises(ValueError) as error: - scale(sample_data, factor=invalid_factor) + scale(sample_data_2d, factor=invalid_factor) assert str(error.value) == expected_error_message + + +@pytest.mark.parametrize( + "factor", + [2, [1, 2, 0.5]], + ids=["uniform scaling", "multi-axis scaling"], +) +def test_scale_3d_space(factor, sample_data_3d: xr.DataArray): + """Test scaling a DataArray with 3D space.""" + scaled_data = scale(sample_data_3d, factor=factor) + expected_output = data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3) * np.array(factor).reshape(1, -1), + coords=SPATIAL_COORDS_3D, + ) + xr.testing.assert_equal(scaled_data, expected_output) + + +@pytest.mark.parametrize( + "factor", + [2, [1, 2, 0.5]], + ids=["uniform scaling", "multi-axis scaling"], +) +def test_scale_invalid_3d_space(factor): + """Test scaling a DataArray with 3D space.""" + invalid_coords = {"space": ["x", "flubble", "y"]} # "z" is missing + invalid_sample_data_3d = data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3), + coords=invalid_coords, + ) + with pytest.raises(ValueError) as error: + scale(invalid_sample_data_3d, factor=factor) + assert str(error.value) == ( + "Input data must contain ['z'] in the 'space' coordinates." + ) From 3da8cd1fb4605403c8af5084f406dfd973a9bfb5 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:57:22 +0000 Subject: [PATCH 14/15] Revert "add 3D space coords validation and test" This reverts commit 7d5423dcb4f76e4f36fcf8220f4c0cbdebbb71b5. --- movement/transforms.py | 9 ++-- tests/test_unit/test_transforms.py | 69 ++++++------------------------ 2 files changed, 17 insertions(+), 61 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 62435368..61d11afe 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -41,12 +41,11 @@ def scale( xarray.DataArray.attrs["space_unit"] is overwritten each time or is dropped if ``None`` is passed by default or explicitly. - """ - if data.space.ndim == 2: - validate_dims_coords(data, {"space": ["x", "y"]}) - else: - validate_dims_coords(data, {"space": ["x", "y", "z"]}) + When the factor is a scalar (a single number), the scaling factor is + applied to all dimensions, while if the factor is a list or array, the + factor is broadcasted along the first matching dimension. + """ validate_dims_coords(data, {"space": ["x", "y"]}) if not np.isscalar(factor): diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index d2182352..89cb0cf3 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -6,8 +6,7 @@ from movement.transforms import scale -SPATIAL_COORDS_2D = {"space": ["x", "y"]} -SPATIAL_COORDS_3D = {"space": ["x", "y", "z"]} +DEFAULT_SPATIAL_COORDS = {"space": ["x", "y"]} def nparray_0_to_23() -> np.ndarray: @@ -16,29 +15,19 @@ def nparray_0_to_23() -> np.ndarray: @pytest.fixture -def sample_data_2d() -> xr.DataArray: +def sample_data() -> xr.DataArray: """Turn the nparray_0_to_23 into a DataArray.""" return data_array_with_dims_and_coords(nparray_0_to_23()) -@pytest.fixture -def sample_data_3d() -> xr.DataArray: - """Turn the nparray_0_to_23 into a DataArray with 3D space.""" - return data_array_with_dims_and_coords( - nparray_0_to_23().reshape(8, 3), - coords=SPATIAL_COORDS_3D, - ) - - def data_array_with_dims_and_coords( data: np.ndarray, dims: list | tuple = ("time", "space"), - coords: dict[str, list[str]] = SPATIAL_COORDS_2D, + coords: dict[str, list[str]] = DEFAULT_SPATIAL_COORDS, **attributes: Any, ) -> xr.DataArray: """Create a DataArray with given data, dimensions, coordinates, and - attributes (e.g. space_unit or factor). The default space coordinates - are x and y (2D). + attributes (e.g. space_unit or factor). """ return xr.DataArray( data, @@ -97,12 +86,12 @@ def data_array_with_dims_and_coords( ], ) def test_scale( - sample_data_2d: xr.DataArray, + sample_data: xr.DataArray, optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): """Test scaling with different factors and space_units.""" - scaled_data = scale(sample_data_2d, **optional_arguments) + scaled_data = scale(sample_data, **optional_arguments) xr.testing.assert_equal(scaled_data, expected_output) assert scaled_data.attrs == expected_output.attrs @@ -130,7 +119,9 @@ def test_scale_space_dimension(dims: list[str], data_shape): """ factor = [0.5, 2] numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - data = xr.DataArray(numerical_data, dims=dims, coords=SPATIAL_COORDS_2D) + data = xr.DataArray( + numerical_data, dims=dims, coords=DEFAULT_SPATIAL_COORDS + ) scaled_data = scale(data, factor=factor) broadcast_list = [1 if dim != "space" else len(factor) for dim in dims] expected_output_data = data * np.array(factor).reshape(broadcast_list) @@ -167,7 +158,7 @@ def test_scale_space_dimension(dims: list[str], data_shape): ], ) def test_scale_twice( - sample_data_2d: xr.DataArray, + sample_data: xr.DataArray, optional_arguments_1: dict[str, Any], optional_arguments_2: dict[str, Any], expected_output: xr.DataArray, @@ -177,7 +168,7 @@ def test_scale_twice( provided, or remove it if None is passed explicitly or by default. """ output_data_array = scale( - scale(sample_data_2d, **optional_arguments_1), + scale(sample_data, **optional_arguments_1), **optional_arguments_2, ) xr.testing.assert_equal(output_data_array, expected_output) @@ -202,45 +193,11 @@ def test_scale_twice( ], ) def test_scale_value_error( - sample_data_2d: xr.DataArray, + sample_data: xr.DataArray, invalid_factor: np.ndarray, expected_error_message: str, ): """Test invalid factors raise correct error type and message.""" with pytest.raises(ValueError) as error: - scale(sample_data_2d, factor=invalid_factor) + scale(sample_data, factor=invalid_factor) assert str(error.value) == expected_error_message - - -@pytest.mark.parametrize( - "factor", - [2, [1, 2, 0.5]], - ids=["uniform scaling", "multi-axis scaling"], -) -def test_scale_3d_space(factor, sample_data_3d: xr.DataArray): - """Test scaling a DataArray with 3D space.""" - scaled_data = scale(sample_data_3d, factor=factor) - expected_output = data_array_with_dims_and_coords( - nparray_0_to_23().reshape(8, 3) * np.array(factor).reshape(1, -1), - coords=SPATIAL_COORDS_3D, - ) - xr.testing.assert_equal(scaled_data, expected_output) - - -@pytest.mark.parametrize( - "factor", - [2, [1, 2, 0.5]], - ids=["uniform scaling", "multi-axis scaling"], -) -def test_scale_invalid_3d_space(factor): - """Test scaling a DataArray with 3D space.""" - invalid_coords = {"space": ["x", "flubble", "y"]} # "z" is missing - invalid_sample_data_3d = data_array_with_dims_and_coords( - nparray_0_to_23().reshape(8, 3), - coords=invalid_coords, - ) - with pytest.raises(ValueError) as error: - scale(invalid_sample_data_3d, factor=factor) - assert str(error.value) == ( - "Input data must contain ['z'] in the 'space' coordinates." - ) From 95f6d7e23202aa2b967cc6d11fb268f4ba599b5b Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:26:23 +0000 Subject: [PATCH 15/15] add 3D space coords validation and test --- movement/transforms.py | 9 ++-- tests/test_unit/test_transforms.py | 74 +++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/movement/transforms.py b/movement/transforms.py index 61d11afe..02fcd44c 100644 --- a/movement/transforms.py +++ b/movement/transforms.py @@ -41,12 +41,11 @@ def scale( xarray.DataArray.attrs["space_unit"] is overwritten each time or is dropped if ``None`` is passed by default or explicitly. - When the factor is a scalar (a single number), the scaling factor is - applied to all dimensions, while if the factor is a list or array, the - factor is broadcasted along the first matching dimension. - """ - validate_dims_coords(data, {"space": ["x", "y"]}) + if len(data.coords["space"]) == 2: + validate_dims_coords(data, {"space": ["x", "y"]}) + else: + validate_dims_coords(data, {"space": ["x", "y", "z"]}) if not np.isscalar(factor): factor = np.array(factor).squeeze() diff --git a/tests/test_unit/test_transforms.py b/tests/test_unit/test_transforms.py index 89cb0cf3..13fb9b51 100644 --- a/tests/test_unit/test_transforms.py +++ b/tests/test_unit/test_transforms.py @@ -6,7 +6,8 @@ from movement.transforms import scale -DEFAULT_SPATIAL_COORDS = {"space": ["x", "y"]} +SPATIAL_COORDS_2D = {"space": ["x", "y"]} +SPATIAL_COORDS_3D = {"space": ["x", "y", "z"]} def nparray_0_to_23() -> np.ndarray: @@ -14,16 +15,10 @@ def nparray_0_to_23() -> np.ndarray: return np.arange(0, 24).reshape(12, 2) -@pytest.fixture -def sample_data() -> xr.DataArray: - """Turn the nparray_0_to_23 into a DataArray.""" - return data_array_with_dims_and_coords(nparray_0_to_23()) - - def data_array_with_dims_and_coords( data: np.ndarray, dims: list | tuple = ("time", "space"), - coords: dict[str, list[str]] = DEFAULT_SPATIAL_COORDS, + coords: dict[str, list[str]] = SPATIAL_COORDS_2D, **attributes: Any, ) -> xr.DataArray: """Create a DataArray with given data, dimensions, coordinates, and @@ -37,6 +32,21 @@ def data_array_with_dims_and_coords( ) +@pytest.fixture +def sample_data_2d() -> xr.DataArray: + """Turn the nparray_0_to_23 into a DataArray.""" + return data_array_with_dims_and_coords(nparray_0_to_23()) + + +@pytest.fixture +def sample_data_3d() -> xr.DataArray: + """Turn the nparray_0_to_23 into a DataArray with 3D space.""" + return data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3), + coords=SPATIAL_COORDS_3D, + ) + + @pytest.mark.parametrize( ["optional_arguments", "expected_output"], [ @@ -86,12 +96,12 @@ def data_array_with_dims_and_coords( ], ) def test_scale( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, optional_arguments: dict[str, Any], expected_output: xr.DataArray, ): """Test scaling with different factors and space_units.""" - scaled_data = scale(sample_data, **optional_arguments) + scaled_data = scale(sample_data_2d, **optional_arguments) xr.testing.assert_equal(scaled_data, expected_output) assert scaled_data.attrs == expected_output.attrs @@ -119,9 +129,7 @@ def test_scale_space_dimension(dims: list[str], data_shape): """ factor = [0.5, 2] numerical_data = np.arange(np.prod(data_shape)).reshape(data_shape) - data = xr.DataArray( - numerical_data, dims=dims, coords=DEFAULT_SPATIAL_COORDS - ) + data = xr.DataArray(numerical_data, dims=dims, coords=SPATIAL_COORDS_2D) scaled_data = scale(data, factor=factor) broadcast_list = [1 if dim != "space" else len(factor) for dim in dims] expected_output_data = data * np.array(factor).reshape(broadcast_list) @@ -158,7 +166,7 @@ def test_scale_space_dimension(dims: list[str], data_shape): ], ) def test_scale_twice( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, optional_arguments_1: dict[str, Any], optional_arguments_2: dict[str, Any], expected_output: xr.DataArray, @@ -168,7 +176,7 @@ def test_scale_twice( provided, or remove it if None is passed explicitly or by default. """ output_data_array = scale( - scale(sample_data, **optional_arguments_1), + scale(sample_data_2d, **optional_arguments_1), **optional_arguments_2, ) xr.testing.assert_equal(output_data_array, expected_output) @@ -193,11 +201,43 @@ def test_scale_twice( ], ) def test_scale_value_error( - sample_data: xr.DataArray, + sample_data_2d: xr.DataArray, invalid_factor: np.ndarray, expected_error_message: str, ): """Test invalid factors raise correct error type and message.""" with pytest.raises(ValueError) as error: - scale(sample_data, factor=invalid_factor) + scale(sample_data_2d, factor=invalid_factor) assert str(error.value) == expected_error_message + + +@pytest.mark.parametrize( + "factor", [2, [1, 2, 0.5]], ids=["uniform scaling", "multi-axis scaling"] +) +def test_scale_3d_space(factor, sample_data_3d: xr.DataArray): + """Test scaling a DataArray with 3D space.""" + scaled_data = scale(sample_data_3d, factor=factor) + expected_output = data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3) * np.array(factor).reshape(1, -1), + coords=SPATIAL_COORDS_3D, + ) + xr.testing.assert_equal(scaled_data, expected_output) + + +@pytest.mark.parametrize( + "factor", + [2, [1, 2, 0.5]], + ids=["uniform scaling", "multi-axis scaling"], +) +def test_scale_invalid_3d_space(factor): + """Test scaling data with invalid 3D space coordinates.""" + invalid_coords = {"space": ["x", "flubble", "y"]} # "z" is missing + invalid_sample_data_3d = data_array_with_dims_and_coords( + nparray_0_to_23().reshape(8, 3), + coords=invalid_coords, + ) + with pytest.raises(ValueError) as error: + scale(invalid_sample_data_3d, factor=factor) + assert str(error.value) == ( + "Input data must contain ['z'] in the 'space' coordinates.\n" + )