Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add from_array_props
Browse files Browse the repository at this point in the history
d-v-b committed Aug 29, 2024
1 parent 2826666 commit 4b06c9b
Showing 2 changed files with 194 additions and 4 deletions.
102 changes: 98 additions & 4 deletions src/pydantic_ome_ngff/v04/multiscale.py
Original file line number Diff line number Diff line change
@@ -359,14 +359,14 @@ def from_arrays(
chunks: tuple[int, ...]
| tuple[tuple[int, ...], ...]
| Literal["auto"] = "auto",
compressor: Codec = DEFAULT_COMPRESSOR,
compressor: Codec | Literal["auto"] = DEFAULT_COMPRESSOR,
fill_value: Any = 0,
order: Literal["C", "F", "auto"] = "auto",
) -> Self:
"""
Create a `Group` from a sequence of multiscale arrays and spatial metadata.
Create a `MultiscaleGroup` from a sequence of multiscale arrays and spatial metadata.
The arrays are used as templates for corresponding `ArraySpec` instances, which model the Zarr arrays that would be created if the `Group` was stored.
The arrays are used as templates for corresponding `ArraySpec` instances, which model the Zarr arrays that would be created if the `MultiscaleGroup` was stored.
Parameters
----------
@@ -395,7 +395,7 @@ def from_arrays(
If a sequence of sequences of ints is provided, then this defines the chunks for each array.
fill_value: Any, default = 0
The fill value for the Zarr arrays.
compressor: `Codec`
compressor: `Codec` | "auto", default = `numcodecs.ZStd`
The compressor to use for the arrays. Default is `numcodecs.ZStd`.
order: "auto" | "C" | "F"
The memory layout used for chunks of Zarr arrays. The default is "auto", which will infer the order from the input arrays, and fall back to "C" if that inference fails.
@@ -436,6 +436,100 @@ def from_arrays(
attributes=MultiscaleGroupAttrs(multiscales=(multimeta,)),
)

@classmethod
def from_array_props(
cls,
dtype: np.dtype[Any],
shapes: Sequence[Sequence[int]],
paths: Sequence[str],
axes: Sequence[Axis],
scales: Sequence[tuple[int | float, ...]],
translations: Sequence[tuple[int | float, ...]],
name: str | None = None,
type: str | None = None,
metadata: dict[str, Any] | None = None,
chunks: tuple[int, ...]
| tuple[tuple[int, ...], ...]
| Literal["auto"] = "auto",
compressor: Codec = DEFAULT_COMPRESSOR,
fill_value: Any = 0,
order: Literal["C", "F", "auto"] = "auto",
) -> Self:
"""
Create a `MultiscaleGroup` from a dtype and a sequence of shapes.
The dtype and shapes are used to parametrize `ArraySpec` instances which model the Zarr arrays that would be created if the `MultiscaleGroup` was stored.
Parameters
----------
dtype: np.dtype[Any]
The data type of the arrays.
shapes: Seqence[Sequence[str]]
The shapes of the arrays.
paths: Sequence[str]
The paths to the arrays.
axes: Sequence[Axis]
`Axis` objects describing the dimensions of the arrays.
scales: Sequence[Sequence[int | float]]
A scale value for each axis of the array, for each shape in `shapes`.
translations: Sequence[Sequence[int | float]]
A translation value for each axis the array, for each shape in `shapes`.
name: str | None, default = None
A name for the multiscale collection. Optional.
type: str | None, default = None
A description of the type of multiscale image represented by this group. Optional.
metadata: Dict[str, Any] | None, default = None
Arbitrary metadata associated with this multiscale collection. Optional.
chunks: tuple[int] | tuple[tuple[int, ...]] | Literal["auto"], default = "auto"
The chunks for the arrays in this multiscale group.
If the string "auto" is provided, each array will have chunks set to the zarr-python default value, which depends on the shape and dtype of the array.
If a single sequence of ints is provided, then this defines the chunks for all arrays.
If a sequence of sequences of ints is provided, then this defines the chunks for each array.
fill_value: Any, default = 0
The fill value for the Zarr arrays.
compressor: `Codec`
The compressor to use for the arrays. Default is `numcodecs.ZStd`.
order: "C" | "F", default = "C"
The memory layout used for chunks of Zarr arrays. The default is "C".
"""

chunks_normalized = normalize_chunks(
chunks,
shapes=tuple(tuple(s) for s in shapes),
typesizes=tuple(dtype.itemsize for s in shapes),
)

members_flat = {
"/" + key.lstrip("/"): ArraySpec(
dtype=dtype,
shape=shape,
chunks=cnks,
attributes={},
compressor=compressor,
filters=None,
fill_value=fill_value,
order=order,
)
for key, shape, cnks in zip(paths, shapes, chunks_normalized)
}

multimeta = MultiscaleMetadata(
name=name,
type=type,
metadata=metadata,
axes=tuple(axes),
datasets=tuple(
create_dataset(path=path, scale=scale, translation=translation)
for path, scale, translation in zip(paths, scales, translations)
),
coordinateTransformations=None,
)
return cls(
members=GroupSpec.from_flat(members_flat).members,
attributes=MultiscaleGroupAttrs(multiscales=(multimeta,)),
)
return cls()

@model_validator(mode="after")
def check_arrays_exist(self) -> MultiscaleGroup:
"""
96 changes: 96 additions & 0 deletions tests/v04/test_multiscales.py
Original file line number Diff line number Diff line change
@@ -432,6 +432,102 @@ def test_from_arrays(
)


@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("type", [None, "foo"])
@pytest.mark.parametrize("path_pattern", ["{0}", "s{0}", "foo/{0}"])
@pytest.mark.parametrize("metadata", [None, {"foo": 10}])
@pytest.mark.parametrize("ndim", [2, 3, 4, 5])
@pytest.mark.parametrize("chunks", ["auto", "tuple", "tuple-of-tuple"])
@pytest.mark.parametrize("order", ["C", "F"])
def test_from_array_props(
name: str | None,
type: str | None,
path_pattern: str,
metadata: dict[str, int] | None,
ndim: int,
chunks: Literal["auto", "tuple", "tuple-of-tuple"],
order: Literal["auto", "C", "F"],
) -> None:
arrays = tuple(np.arange(x**ndim).reshape((x,) * ndim) for x in [3, 2, 1])
paths = tuple(path_pattern.format(idx) for idx in range(len(arrays)))
scales = tuple((2**idx,) * ndim for idx in range(len(arrays)))
translations = tuple(
(t,) * ndim
for t in accumulate(
[(2 ** (idx - 1)) for idx in range(len(arrays))], operator.add
)
)

all_axes = tuple(
[
Axis(
name="x",
type="space",
),
Axis(name="y", type="space"),
Axis(name="z", type="space"),
Axis(name="t", type="time"),
Axis(name="c", type="barf"),
]
)
# spatial axes have to come last
if ndim in (2, 3):
axes = all_axes[:ndim]
else:
axes = tuple([*all_axes[4:], *all_axes[:3]])
chunks_arg: tuple[tuple[int, ...], ...] | tuple[int, ...] | Literal["auto"]
if chunks == "auto":
chunks_arg = chunks
chunks_expected = (
guess_chunks(arrays[0].shape, arrays[0].dtype.itemsize),
) * len(arrays)
elif chunks == "tuple":
chunks_arg = (2,) * ndim
chunks_expected = (chunks_arg,) * len(arrays)
elif chunks == "tuple-of-tuple":
chunks_arg = tuple((idx,) * ndim for idx in range(1, len(arrays) + 1))
chunks_expected = chunks_arg

if order == "auto":
order_expected = "C"
else:
order_expected = order

group = MultiscaleGroup.from_array_props(
dtype=arrays[0].dtype,
shapes=tuple(a.shape for a in arrays),
paths=paths,
axes=axes,
scales=scales,
translations=translations,
name=name,
type=type,
metadata=metadata,
chunks=chunks_arg,
order=order,
)

group_flat = group.to_flat()

assert group.attributes.multiscales[0].name == name
assert group.attributes.multiscales[0].type == type
assert group.attributes.multiscales[0].metadata == metadata
assert group.attributes.multiscales[0].coordinateTransformations is None
assert group.attributes.multiscales[0].axes == tuple(axes)
for idx, array in enumerate(arrays):
array_model: ArraySpec = group_flat["/" + paths[idx]]
assert array_model.order == order_expected
assert array.shape == array_model.shape
assert array.dtype == array_model.dtype
assert chunks_expected[idx] == array_model.chunks
assert group.attributes.multiscales[0].datasets[
idx
].coordinateTransformations == (
VectorScale(scale=scales[idx]),
VectorTranslation(translation=translations[idx]),
)


@pytest.mark.parametrize(
"store_type", ["memory_store", "fsstore_local", "nested_directory_store"]
)

0 comments on commit 4b06c9b

Please sign in to comment.