add from_array_props

JaneliaSciComp · Aug 29, 2024 · 4b06c9b · 4b06c9b
1 parent 2826666
commit 4b06c9b
Showing 2 changed files with 194 additions and 4 deletions.
diff --git a/src/pydantic_ome_ngff/v04/multiscale.py b/src/pydantic_ome_ngff/v04/multiscale.py
@@ -359,14 +359,14 @@ def from_arrays(
         chunks: tuple[int, ...]
         | tuple[tuple[int, ...], ...]
         | Literal["auto"] = "auto",
-        compressor: Codec = DEFAULT_COMPRESSOR,
+        compressor: Codec | Literal["auto"] = DEFAULT_COMPRESSOR,
         fill_value: Any = 0,
         order: Literal["C", "F", "auto"] = "auto",
     ) -> Self:
         """
-        Create a `Group` from a sequence of multiscale arrays and spatial metadata.
+        Create a `MultiscaleGroup` from a sequence of multiscale arrays and spatial metadata.
 
-        The arrays are used as templates for corresponding `ArraySpec` instances, which model the Zarr arrays that would be created if the `Group` was stored.
+        The arrays are used as templates for corresponding `ArraySpec` instances, which model the Zarr arrays that would be created if the `MultiscaleGroup` was stored.
 
         Parameters
         ----------
@@ -395,7 +395,7 @@ def from_arrays(
             If a sequence of sequences of ints is provided, then this defines the chunks for each array.
         fill_value: Any, default = 0
             The fill value for the Zarr arrays.
-        compressor: `Codec`
+        compressor: `Codec` | "auto", default = `numcodecs.ZStd`
             The compressor to use for the arrays. Default is `numcodecs.ZStd`.
         order: "auto" | "C" | "F"
             The memory layout used for chunks of Zarr arrays. The default is "auto", which will infer the order from the input arrays, and fall back to "C" if that inference fails.
@@ -436,6 +436,100 @@ def from_arrays(
             attributes=MultiscaleGroupAttrs(multiscales=(multimeta,)),
         )
 
+    @classmethod
+    def from_array_props(
+        cls,
+        dtype: np.dtype[Any],
+        shapes: Sequence[Sequence[int]],
+        paths: Sequence[str],
+        axes: Sequence[Axis],
+        scales: Sequence[tuple[int | float, ...]],
+        translations: Sequence[tuple[int | float, ...]],
+        name: str | None = None,
+        type: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        chunks: tuple[int, ...]
+        | tuple[tuple[int, ...], ...]
+        | Literal["auto"] = "auto",
+        compressor: Codec = DEFAULT_COMPRESSOR,
+        fill_value: Any = 0,
+        order: Literal["C", "F", "auto"] = "auto",
+    ) -> Self:
+        """
+        Create a `MultiscaleGroup` from a dtype and a sequence of shapes.
+
+        The dtype and shapes are used to parametrize `ArraySpec` instances which model the Zarr arrays that would be created if the `MultiscaleGroup` was stored.
+
+        Parameters
+        ----------
+        dtype: np.dtype[Any]
+            The data type of the arrays.
+        shapes: Seqence[Sequence[str]]
+            The shapes of the arrays.
+        paths: Sequence[str]
+            The paths to the arrays.
+        axes: Sequence[Axis]
+            `Axis` objects describing the dimensions of the arrays.
+        scales: Sequence[Sequence[int | float]]
+            A scale value for each axis of the array, for each shape in `shapes`.
+        translations: Sequence[Sequence[int | float]]
+            A translation value for each axis the array, for each shape in `shapes`.
+        name: str | None, default = None
+            A name for the multiscale collection. Optional.
+        type: str | None, default = None
+            A description of the type of multiscale image represented by this group. Optional.
+        metadata: Dict[str, Any] | None, default = None
+            Arbitrary metadata associated with this multiscale collection. Optional.
+        chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto"
+            The chunks for the arrays in this multiscale group.
+            If the string "auto" is provided, each array will have chunks set to the zarr-python default value, which depends on the shape and dtype of the array.
+            If a single sequence of ints is provided, then this defines the chunks for all arrays.
+            If a sequence of sequences of ints is provided, then this defines the chunks for each array.
+        fill_value: Any, default = 0
+            The fill value for the Zarr arrays.
+        compressor: `Codec`
+            The compressor to use for the arrays. Default is `numcodecs.ZStd`.
+        order: "C" | "F", default = "C"
+            The memory layout used for chunks of Zarr arrays. The default is "C".
+        """
+
+        chunks_normalized = normalize_chunks(
+            chunks,
+            shapes=tuple(tuple(s) for s in shapes),
+            typesizes=tuple(dtype.itemsize for s in shapes),
+        )
+
+        members_flat = {
+            "/" + key.lstrip("/"): ArraySpec(
+                dtype=dtype,
+                shape=shape,
+                chunks=cnks,
+                attributes={},
+                compressor=compressor,
+                filters=None,
+                fill_value=fill_value,
+                order=order,
+            )
+            for key, shape, cnks in zip(paths, shapes, chunks_normalized)
+        }
+
+        multimeta = MultiscaleMetadata(
+            name=name,
+            type=type,
+            metadata=metadata,
+            axes=tuple(axes),
+            datasets=tuple(
+                create_dataset(path=path, scale=scale, translation=translation)
+                for path, scale, translation in zip(paths, scales, translations)
+            ),
+            coordinateTransformations=None,
+        )
+        return cls(
+            members=GroupSpec.from_flat(members_flat).members,
+            attributes=MultiscaleGroupAttrs(multiscales=(multimeta,)),
+        )
+        return cls()
+
     @model_validator(mode="after")
     def check_arrays_exist(self) -> MultiscaleGroup:
         """

diff --git a/tests/v04/test_multiscales.py b/tests/v04/test_multiscales.py
@@ -432,6 +432,102 @@ def test_from_arrays(
         )
 
 
+@pytest.mark.parametrize("name", [None, "foo"])
+@pytest.mark.parametrize("type", [None, "foo"])
+@pytest.mark.parametrize("path_pattern", ["{0}", "s{0}", "foo/{0}"])
+@pytest.mark.parametrize("metadata", [None, {"foo": 10}])
+@pytest.mark.parametrize("ndim", [2, 3, 4, 5])
+@pytest.mark.parametrize("chunks", ["auto", "tuple", "tuple-of-tuple"])
+@pytest.mark.parametrize("order", ["C", "F"])
+def test_from_array_props(
+    name: str | None,
+    type: str | None,
+    path_pattern: str,
+    metadata: dict[str, int] | None,
+    ndim: int,
+    chunks: Literal["auto", "tuple", "tuple-of-tuple"],
+    order: Literal["auto", "C", "F"],
+) -> None:
+    arrays = tuple(np.arange(x**ndim).reshape((x,) * ndim) for x in [3, 2, 1])
+    paths = tuple(path_pattern.format(idx) for idx in range(len(arrays)))
+    scales = tuple((2**idx,) * ndim for idx in range(len(arrays)))
+    translations = tuple(
+        (t,) * ndim
+        for t in accumulate(
+            [(2 ** (idx - 1)) for idx in range(len(arrays))], operator.add
+        )
+    )
+
+    all_axes = tuple(
+        [
+            Axis(
+                name="x",
+                type="space",
+            ),
+            Axis(name="y", type="space"),
+            Axis(name="z", type="space"),
+            Axis(name="t", type="time"),
+            Axis(name="c", type="barf"),
+        ]
+    )
+    # spatial axes have to come last
+    if ndim in (2, 3):
+        axes = all_axes[:ndim]
+    else:
+        axes = tuple([*all_axes[4:], *all_axes[:3]])
+    chunks_arg: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"]
+    if chunks == "auto":
+        chunks_arg = chunks
+        chunks_expected = (
+            guess_chunks(arrays[0].shape, arrays[0].dtype.itemsize),
+        ) * len(arrays)
+    elif chunks == "tuple":
+        chunks_arg = (2,) * ndim
+        chunks_expected = (chunks_arg,) * len(arrays)
+    elif chunks == "tuple-of-tuple":
+        chunks_arg = tuple((idx,) * ndim for idx in range(1, len(arrays) + 1))
+        chunks_expected = chunks_arg
+
+    if order == "auto":
+        order_expected = "C"
+    else:
+        order_expected = order
+
+    group = MultiscaleGroup.from_array_props(
+        dtype=arrays[0].dtype,
+        shapes=tuple(a.shape for a in arrays),
+        paths=paths,
+        axes=axes,
+        scales=scales,
+        translations=translations,
+        name=name,
+        type=type,
+        metadata=metadata,
+        chunks=chunks_arg,
+        order=order,
+    )
+
+    group_flat = group.to_flat()
+
+    assert group.attributes.multiscales[0].name == name
+    assert group.attributes.multiscales[0].type == type
+    assert group.attributes.multiscales[0].metadata == metadata
+    assert group.attributes.multiscales[0].coordinateTransformations is None
+    assert group.attributes.multiscales[0].axes == tuple(axes)
+    for idx, array in enumerate(arrays):
+        array_model: ArraySpec = group_flat["/" + paths[idx]]
+        assert array_model.order == order_expected
+        assert array.shape == array_model.shape
+        assert array.dtype == array_model.dtype
+        assert chunks_expected[idx] == array_model.chunks
+        assert group.attributes.multiscales[0].datasets[
+            idx
+        ].coordinateTransformations == (
+            VectorScale(scale=scales[idx]),
+            VectorTranslation(translation=translations[idx]),
+        )
+
+
 @pytest.mark.parametrize(
     "store_type", ["memory_store", "fsstore_local", "nested_directory_store"]
 )