Skip to content

Commit

Permalink
feat: new, safe, documented PrecomputedInfoSpec
Browse files Browse the repository at this point in the history
  • Loading branch information
supersergiy committed Jan 31, 2025
1 parent 31939fa commit b61cacc
Show file tree
Hide file tree
Showing 13 changed files with 1,159 additions and 447 deletions.
196 changes: 151 additions & 45 deletions tests/unit/layer/volumetric/cloudvol/test_backend.py

Large diffs are not rendered by default.

394 changes: 155 additions & 239 deletions tests/unit/layer/volumetric/precomputed/test_precomputed.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions zetta_utils/layer/deprecated/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import precomputed
File renamed without changes.
412 changes: 412 additions & 0 deletions zetta_utils/layer/precomputed.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions zetta_utils/layer/volumetric/cloudvol/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from zetta_utils.common import abspath, is_local
from zetta_utils.geometry import Vec3D

from ...precomputed import InfoExistsModes, PrecomputedInfoSpec, get_info
from .. import VolumetricBackend, VolumetricIndex
from ..precomputed import InfoExistsModes, PrecomputedInfoSpec, get_info

_cv_cache: cachetools.LRUCache = cachetools.LRUCache(maxsize=16)
_cv_cached: Dict[str, set] = {}

IN_MEM_CACHE_NUM_BYTES_PER_CV = 128 * 1024**2
IN_MEM_CACHE_NUM_BYTES_PER_CV = 128 * 1024 ** 2

# To avoid reloading info file - note that an empty provenance is passed
# since otherwise the CloudVolume's __new__ will download the provenance
Expand Down
134 changes: 57 additions & 77 deletions zetta_utils/layer/volumetric/cloudvol/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pylint: disable=missing-docstring
from __future__ import annotations

from typing import Any, Iterable, Literal, Sequence, Union
from typing import Iterable, Literal, Sequence, Union

import torch
from numpy import typing as npt
Expand All @@ -10,15 +10,20 @@
from zetta_utils.tensor_ops import InterpolationMode

from ... import DataProcessor, IndexProcessor, JointIndexDataProcessor
from ...precomputed import (
InfoExistsModes,
InfoSpecParams,
PrecomputedInfoSpec,
PrecomputedVolumeDType,
)
from .. import VolumetricIndex, VolumetricLayer, build_volumetric_layer
from ..precomputed import InfoExistsModes, PrecomputedInfoSpec
from . import CVBackend

# from typeguard import typechecked


# @typechecked # ypeError: isinstance() arg 2 must be a type or tuple of types on p3.9
@builder.register("build_cv_layer")
@builder.register("build_cv_layer", versions=">=0.4")
def build_cv_layer( # pylint: disable=too-many-locals
path: str,
cv_kwargs: dict | None = None,
Expand All @@ -27,25 +32,16 @@ def build_cv_layer( # pylint: disable=too-many-locals
data_resolution: Sequence[float] | None = None,
interpolation_mode: InterpolationMode | None = None,
readonly: bool = False,
info_extend_if_exists: bool = True,
info_reference_path: str | None = None,
info_type: Literal["image", "segmentation"] | None = None,
info_data_type: str | None = None,
info_data_type: PrecomputedVolumeDType | None = None,
info_num_channels: int | None = None,
info_field_overrides: dict[str, Any] | None = None,
info_chunk_size: Sequence[int] | None = None,
info_chunk_size_map: dict[str, Sequence[int]] | None = None,
info_dataset_size: Sequence[int] | None = None,
info_dataset_size_map: dict[str, Sequence[int]] | None = None,
info_voxel_offset: Sequence[int] | None = None,
info_voxel_offset_map: dict[str, Sequence[int]] | None = None,
info_encoding: str | None = None,
info_encoding_map: dict[str, str] | None = None,
info_add_scales: Sequence[Sequence[float] | dict[str, Any]] | None = None,
info_add_scales_ref: str | dict[str, Any] | None = None,
info_add_scales_exclude_fields: Sequence[str] = (),
info_add_scales_mode: Literal["merge", "replace"] = "merge",
info_only_retain_scales: Sequence[Sequence[float]] | None = None,
info_scales: Sequence[Sequence[float]] | None = None,
inherit_all_params: bool = False,
on_info_exists: InfoExistsModes = "extend",
allow_slice_rounding: bool = False,
index_procs: Iterable[IndexProcessor[VolumetricIndex]] = (),
Expand All @@ -66,85 +62,69 @@ def build_cv_layer( # pylint: disable=too-many-locals
:param path: Path to the CloudVolume.
:param cv_kwargs: Keyword arguments passed to the CloudVolume constructor.
:param default_desired_resolution: Default resolution used when the desired resolution
is not given as a part of an index.
:param default_desired_resolution: Default resolution used when the desired
resolution is not given as a part of an index.
:param index_resolution: Resolution at which slices of the index will be given.
:param data_resolution: Resolution at which data will be read from the CloudVolume backend.
When ``data_resolution`` differs from ``desired_resolution``, data will be interpolated
from ``data_resolution`` to ``desired_resolution`` using the given ``interpolation_mode``.
:param data_resolution: Resolution at which data will be read from the CloudVolume
backend. When ``data_resolution`` differs from ``desired_resolution``, data
will be interpolated from ``data_resolution`` to ``desired_resolution`` using
the given ``interpolation_mode``.
:param interpolation_mode: Specification of the interpolation mode to use when
``data_resolution`` differs from ``desired_resolution``.
:param readonly: Whether layer is read only.
:param info_reference_path: Path to a reference CloudVolume for info.
:param info_type: Type of the volume. Takes precedence over ``info_fields_overrides["type"]``.
:param info_data_type: Data type of the volume. Takes precedence over
``info_fields_overrides["data_type"]``.
:param info_num_channels: Number of channels of the volume. Takes precedence over
``info_fields_overrides["num_channels"]``.
:param info_field_overrides: Manual info field specifications.
:param info_chunk_size: Precomputed chunk size for all scales.
:param info_chunk_size_map: Precomputed chunk size for each resolution.
:param info_dataset_size: Precomputed dataset size for all scales.
:param info_dataset_size_map: Precomputed dataset size for each resolution.
:param info_voxel_offset: Precomputed voxel offset for all scales.
:param info_voxel_offset_map: Precomputed voxel offset for each resolution.
:param info_encoding: Precomputed encoding for all scales.
:param info_encoding_map: Precomputed encoding for each resolution.
:param info_add_scales: List of scales to be added based on ``info_add_scales_ref``
Each entry can be either a resolution (e.g., [4, 4, 40]) or a partially filled
Precomputed scale. By default, ``size`` and ``voxel_offset`` will be scaled
accordingly to the reference scale, while keeping ``chunk_sizes`` the same.
Note that using ``info_[chunk_size,dataset_size,voxel_offset][_map]`` will
override these values. Using this will also sort the added and existing scales
by their resolutions.
:param info_add_scales_ref: Reference scale to be used. If `None`, use
the highest available resolution scale.
:param info_add_scales_mode: Either "merge" or "replace". "merge" will
merge added scales to existing scales if ``info_reference_path`` is
used, while "replace" will not keep them.
:param info_only_retain_scales: Only keep the given scales. Evaluated after all
other info operations except for the actual writing.
:param on_info_exists: Behavior mode for when both new info specs aregiven
and layer info already exists.
:param allow_slice_rounding: Whether layer allows IO operations where the specified index
corresponds to a non-integer number of pixels at the desired resolution. When
``allow_slice_rounding == True``, shapes will be rounded to nearest integer.
:param index_procs: List of processors that will be applied to the index given by the user
prior to IO operations.
:param info_scales: List of scales to be added to the info file.
:param info_type: Type of the volume (`image` or `segmentation`).
:param info_data_type: Data type of the volume.
:param info_num_channels: Number of channels of the volume.
:param info_chunk_size: Precomputed chunk size for all new scales.
:param info_dataset_size: Precomputed dataset size for all new scales.
:param info_voxel_offset: Precomputed voxel offset for all new scales.
:param info_bounds_resolution: Resolution used to specify dataset size and voxel
offset.
:param info_encoding: Precomputed encoding for all new scales.
:param inherit_all_params: Whether to inherit all unspecified parameters from the
reference info file. If False, only the dataset bounds will be inherited.
:param on_info_exists: Behavior mode for when both new info specs are given and
layer info already exists.
:param allow_slice_rounding: Whether layer allows IO operations where the specified
index corresponds to a non-integer number of pixels at the desired resolution.
When ``allow_slice_rounding == True``, shapes will be rounded to nearest integer.
:param index_procs: List of processors that will be applied to the index given by
the user prior to IO operations.
:param read_procs: List of processors that will be applied to the read data before
returning it to the user.
:param write_procs: List of processors that will be applied to the data given by
the user before writing it to the backend.
:return: Layer built according to the spec.
"""
if cv_kwargs is None:
cv_kwargs = {}

if info_scales is not None:
info_spec = PrecomputedInfoSpec(
info_spec_params=InfoSpecParams.from_optional_reference(
reference_path=info_reference_path,
scales=info_scales,
type=info_type,
data_type=info_data_type,
chunk_size=info_chunk_size,
num_channels=info_num_channels,
encoding=info_encoding,
voxel_offset=info_voxel_offset,
size=info_dataset_size,
bounds_resolution=default_desired_resolution,
inherit_all_params=inherit_all_params,
)
)
else:
info_spec = PrecomputedInfoSpec(info_path=path)

backend = CVBackend(
path=path,
cv_kwargs=cv_kwargs,
on_info_exists=on_info_exists,
info_spec=PrecomputedInfoSpec(
type=info_type,
data_type=info_data_type,
num_channels=info_num_channels,
reference_path=info_reference_path,
extend_if_exists_path=path if info_extend_if_exists else None,
field_overrides=info_field_overrides,
default_chunk_size=info_chunk_size,
chunk_size_map=info_chunk_size_map,
default_dataset_size=info_dataset_size,
dataset_size_map=info_dataset_size_map,
default_voxel_offset=info_voxel_offset,
voxel_offset_map=info_voxel_offset_map,
default_encoding=info_encoding,
encoding_map=info_encoding_map,
add_scales=info_add_scales,
add_scales_ref=info_add_scales_ref,
add_scales_mode=info_add_scales_mode,
add_scales_exclude_fields=info_add_scales_exclude_fields,
only_retain_scales=info_only_retain_scales,
),
info_spec=info_spec,
)

result = build_volumetric_layer(
Expand Down
162 changes: 162 additions & 0 deletions zetta_utils/layer/volumetric/cloudvol/deprecated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# pylint: disable=missing-docstring
from __future__ import annotations

from typing import Any, Iterable, Literal, Sequence, Union

import torch
from numpy import typing as npt

from zetta_utils import builder
from zetta_utils.tensor_ops import InterpolationMode

from ... import DataProcessor, IndexProcessor, JointIndexDataProcessor
from ...deprecated.precomputed import InfoExistsModes, PrecomputedInfoSpec
from .. import VolumetricIndex, VolumetricLayer, build_volumetric_layer
from . import CVBackend

# from typeguard import typechecked


# @typechecked # ypeError: isinstance() arg 2 must be a type or tuple of types on p3.9
@builder.register("build_cv_layer", versions="<=0.3")
def build_cv_layer( # pylint: disable=too-many-locals
path: str,
cv_kwargs: dict | None = None,
default_desired_resolution: Sequence[float] | None = None,
index_resolution: Sequence[float] | None = None,
data_resolution: Sequence[float] | None = None,
interpolation_mode: InterpolationMode | None = None,
readonly: bool = False,
info_extend_if_exists: bool = True,
info_reference_path: str | None = None,
info_type: Literal["image", "segmentation"] | None = None,
info_data_type: str | None = None,
info_num_channels: int | None = None,
info_field_overrides: dict[str, Any] | None = None,
info_chunk_size: Sequence[int] | None = None,
info_chunk_size_map: dict[str, Sequence[int]] | None = None,
info_dataset_size: Sequence[int] | None = None,
info_dataset_size_map: dict[str, Sequence[int]] | None = None,
info_voxel_offset: Sequence[int] | None = None,
info_voxel_offset_map: dict[str, Sequence[int]] | None = None,
info_encoding: str | None = None,
info_encoding_map: dict[str, str] | None = None,
info_add_scales: Sequence[Sequence[float] | dict[str, Any]] | None = None,
info_add_scales_ref: str | dict[str, Any] | None = None,
info_add_scales_exclude_fields: Sequence[str] = (),
info_add_scales_mode: Literal["merge", "replace"] = "merge",
info_only_retain_scales: Sequence[Sequence[float]] | None = None,
on_info_exists: InfoExistsModes = "extend",
allow_slice_rounding: bool = False,
index_procs: Iterable[IndexProcessor[VolumetricIndex]] = (),
read_procs: Iterable[
Union[
DataProcessor[npt.NDArray],
JointIndexDataProcessor[npt.NDArray, VolumetricIndex],
]
] = (),
write_procs: Iterable[
Union[
DataProcessor[npt.NDArray | torch.Tensor],
JointIndexDataProcessor[npt.NDArray | torch.Tensor, VolumetricIndex],
]
] = (),
) -> VolumetricLayer: # pragma: no cover # trivial conditional, delegation only
"""Build a CloudVolume layer.
:param path: Path to the CloudVolume.
:param cv_kwargs: Keyword arguments passed to the CloudVolume constructor.
:param default_desired_resolution: Default resolution used when the desired resolution
is not given as a part of an index.
:param index_resolution: Resolution at which slices of the index will be given.
:param data_resolution: Resolution at which data will be read from the CloudVolume backend.
When ``data_resolution`` differs from ``desired_resolution``, data will be interpolated
from ``data_resolution`` to ``desired_resolution`` using the given ``interpolation_mode``.
:param interpolation_mode: Specification of the interpolation mode to use when
``data_resolution`` differs from ``desired_resolution``.
:param readonly: Whether layer is read only.
:param info_reference_path: Path to a reference CloudVolume for info.
:param info_type: Type of the volume. Takes precedence over ``info_fields_overrides["type"]``.
:param info_data_type: Data type of the volume. Takes precedence over
``info_fields_overrides["data_type"]``.
:param info_num_channels: Number of channels of the volume. Takes precedence over
``info_fields_overrides["num_channels"]``.
:param info_field_overrides: Manual info field specifications.
:param info_chunk_size: Precomputed chunk size for all scales.
:param info_chunk_size_map: Precomputed chunk size for each resolution.
:param info_dataset_size: Precomputed dataset size for all scales.
:param info_dataset_size_map: Precomputed dataset size for each resolution.
:param info_voxel_offset: Precomputed voxel offset for all scales.
:param info_voxel_offset_map: Precomputed voxel offset for each resolution.
:param info_encoding: Precomputed encoding for all scales.
:param info_encoding_map: Precomputed encoding for each resolution.
:param info_add_scales: List of scales to be added based on ``info_add_scales_ref``
Each entry can be either a resolution (e.g., [4, 4, 40]) or a partially filled
Precomputed scale. By default, ``size`` and ``voxel_offset`` will be scaled
accordingly to the reference scale, while keeping ``chunk_sizes`` the same.
Note that using ``info_[chunk_size,dataset_size,voxel_offset][_map]`` will
override these values. Using this will also sort the added and existing scales
by their resolutions.
:param info_add_scales_ref: Reference scale to be used. If `None`, use
the highest available resolution scale.
:param info_add_scales_mode: Either "merge" or "replace". "merge" will
merge added scales to existing scales if ``info_reference_path`` is
used, while "replace" will not keep them.
:param info_only_retain_scales: Only keep the given scales. Evaluated after all
other info operations except for the actual writing.
:param on_info_exists: Behavior mode for when both new info specs aregiven
and layer info already exists.
:param allow_slice_rounding: Whether layer allows IO operations where the specified index
corresponds to a non-integer number of pixels at the desired resolution. When
``allow_slice_rounding == True``, shapes will be rounded to nearest integer.
:param index_procs: List of processors that will be applied to the index given by the user
prior to IO operations.
:param read_procs: List of processors that will be applied to the read data before
returning it to the user.
:param write_procs: List of processors that will be applied to the data given by
the user before writing it to the backend.
:return: Layer built according to the spec.
"""
if cv_kwargs is None:
cv_kwargs = {}
backend = CVBackend(
path=path,
cv_kwargs=cv_kwargs,
on_info_exists=on_info_exists,
info_spec=PrecomputedInfoSpec( # type: ignore # deprecated and new are compatible
type=info_type,
data_type=info_data_type,
num_channels=info_num_channels,
reference_path=info_reference_path,
extend_if_exists_path=path if info_extend_if_exists else None,
field_overrides=info_field_overrides,
default_chunk_size=info_chunk_size,
chunk_size_map=info_chunk_size_map,
default_dataset_size=info_dataset_size,
dataset_size_map=info_dataset_size_map,
default_voxel_offset=info_voxel_offset,
voxel_offset_map=info_voxel_offset_map,
default_encoding=info_encoding,
encoding_map=info_encoding_map,
add_scales=info_add_scales,
add_scales_ref=info_add_scales_ref,
add_scales_mode=info_add_scales_mode,
add_scales_exclude_fields=info_add_scales_exclude_fields,
only_retain_scales=info_only_retain_scales,
),
)

result = build_volumetric_layer(
backend=backend,
default_desired_resolution=default_desired_resolution,
index_resolution=index_resolution,
data_resolution=data_resolution,
interpolation_mode=interpolation_mode,
readonly=readonly,
allow_slice_rounding=allow_slice_rounding,
index_procs=index_procs,
read_procs=read_procs,
write_procs=write_procs,
)
return result
6 changes: 0 additions & 6 deletions zetta_utils/layer/volumetric/precomputed/__init__.py

This file was deleted.

1 change: 1 addition & 0 deletions zetta_utils/layer/volumetric/tensorstore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .backend import TSBackend
from .build import build_ts_layer
from . import deprecated
Loading

0 comments on commit b61cacc

Please sign in to comment.