diff --git a/pyproject.toml b/pyproject.toml index abc05ef55..7c46685f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,14 @@ mazepa-addons = [ "google-api-python-client", "google-cloud-compute", ] +meshing = [ + "zetta_utils[cloudvol, datastore, mazepa]", + "mapbuffer >= 0.7.2", + "pyfqmr >= 0.2.0", + "shard-computer >= 1.1.1", + "trimesh >= 4.4.0", + "zmesh >= 1.7.1", +] modules = [ # put them in order of dependencies "zetta_utils[tensor_ops, viz, cli, gcs, slurm]", @@ -102,7 +110,13 @@ modules = [ "zetta_utils[training,mazepa]", "zetta_utils[mazepa_addons]", "zetta_utils[datastore]", + "zetta_utils[montaging]", "zetta_utils[segmentation]", + "zetta_utils[meshing]", +] +montaging = [ + "zetta_utils[cloudvol, datastore, mazepa]", + "torch >= 2.0", ] public = [ # put them in order of dependencies diff --git a/specs/dodam/meshing_copy.cue b/specs/dodam/meshing_copy.cue new file mode 100644 index 000000000..6306ea751 --- /dev/null +++ b/specs/dodam/meshing_copy.cue @@ -0,0 +1,66 @@ +// +// Handy variables +#SRC_PATH: "gs://dacey-human-retina-001-segmentation/240320-retina-finetune-embd24-v1-x1/20240408/seg_agg25" +#DST_PATH: "gs://dodam_exp/seg_medcutoutlod" +#BBOX: { + "@type": "BBox3D.from_coords" + start_coord: [1024 * 0, 1024 * 0, 1995] + end_coord: [1024 * 10, 1024 * 10, 1995 + 128] + resolution: [20, 20, 50] +} + +// Execution parameters +"@type": "mazepa.execute_on_gcp_with_sqs" +worker_image: "us.gcr.io/zetta-research/zetta_utils:sergiy_all_p310_x214" +worker_cluster_name: "zutils-x3" +worker_cluster_region: "us-east1" +worker_cluster_project: "zetta-research" +worker_resources: { + memory: "18560Mi" + //"nvidia.com/gpu": "1" +} +worker_replicas: 10 + +local_test: true // set to `false` execute remotely + +target: { + // We're applying subchunkable processing flow, + "@type": "build_subchunkable_apply_flow" + bbox: #BBOX + + // What resolution is our destination? + dst_resolution: [20, 20, 50] + + // How do we chunk/crop/blend? + processing_chunk_sizes: [[2 * 1024, 2 * 1024, 64]] + processing_crop_pads: [[1, 0, 0]] + processing_blend_pads: [[0, 0, 0]] + skip_intermediaries: true + + // We want to expand the input bbox to be evenly divisible + // by chunk size + expand_bbox_processing: true + + // Specification for the operation we're performing + fn: { + "@type": "lambda" + lambda_str: "lambda src: src" + } + // Specification for the inputs to the operation + op_kwargs: { + src: { + "@type": "build_cv_layer" + path: #SRC_PATH + } + } + + // Specification of the output layer. Subchunkable expects + // a single output layer. If multiple output layers are + // needed, refer to advanced examples. + dst: { + "@type": "build_cv_layer" + path: #DST_PATH + info_reference_path: #SRC_PATH + //on_info_exists: "overwrite" + } +} diff --git a/specs/dodam/meshing_frag.cue b/specs/dodam/meshing_frag.cue new file mode 100644 index 000000000..51fc5ec9c --- /dev/null +++ b/specs/dodam/meshing_frag.cue @@ -0,0 +1,85 @@ +// +// Handy variables +#SEG_PATH: "gs://dodam_exp/seg_medcutout" +#MESH_DIR: "mesh_mip_1_err_40" +#BBOX: { + "@type": "BBox3D.from_coords" + start_coord: [1024 * 0, 1024 * 0, 1995] + end_coord: [1024 * 10, 1024 * 10, 1995 + 128] + resolution: [20, 20, 50] +} + +#SEG_DB_PATH: "dodam-med-seg-512-512-128v2" +#FRAG_DB_PATH: "dodam-med-frag-512-512-128v2" +#PROJECT: "zetta-research" + + +// Execution parameters +"@type": "mazepa.execute_on_gcp_with_sqs" +worker_image: "us-east1-docker.pkg.dev/zetta-research/zutils/zetta_utils:dodam-meshing-14" +worker_cluster_name: "zutils-x3" +worker_cluster_region: "us-east1" +worker_cluster_project: "zetta-research" +worker_resources: { + memory: "18560Mi" + //"nvidia.com/gpu": "1" +} +worker_replicas: 100 +num_procs: 1 +semaphores_spec: { + "read": 8 + "write": 8 + "cuda": 0 + "cpu": 8 +} +local_test: true// set to `false` execute remotely +debug: true// set to `false` execute remotely + +target: { + // We're applying subchunkable processing flow, + "@type": "build_subchunkable_apply_flow" + bbox: #BBOX + + // What resolution is our destination? + dst_resolution: [20, 20, 50] + + // How do we chunk/crop/blend? + processing_chunk_sizes: [[512, 512, 128]] + processing_crop_pads: [[0, 0, 0]] + processing_blend_pads: [[0, 0, 0]] + skip_intermediaries: true + + // We want to expand the input bbox to be evenly divisible + // by chunk size + expand_bbox_processing: true + + // Specification for the operation we're performing + op: { + "@type": "MakeMeshFragsOperation" + } + // Specification for the inputs to the operation + op_kwargs: { + segmentation: { + "@type": "build_cv_layer" + path: #SEG_PATH + }, + seg_db: { + "@type": "build_datastore_layer" + namespace: #SEG_DB_PATH + project: #PROJECT + }, + frag_db: { + "@type": "build_datastore_layer" + namespace: #FRAG_DB_PATH + project: #PROJECT + }, + mesh_dir: #MESH_DIR, + num_splits: [2,2,1] +// num_splits: [1,1,1] + } + + // Specification of the output layer. Subchunkable expects + // a single output layer. If multiple output layers are + // needed, refer to advanced examples. + dst: null +} diff --git a/specs/dodam/meshing_shard.cue b/specs/dodam/meshing_shard.cue new file mode 100644 index 000000000..2389ce54c --- /dev/null +++ b/specs/dodam/meshing_shard.cue @@ -0,0 +1,55 @@ +// +// Handy variables +#SEG_PATH: "gs://dodam_exp/seg_medcutout" +#MESH_DIR: "mesh_mip_1_err_40" +#BBOX: { + "@type": "BBox3D.from_coords" + start_coord: [1024 * 0, 1024 * 0, 1995] + end_coord: [1024 * 10, 1024 * 10, 1995 + 128] + resolution: [20, 20, 50] +} + +#SEG_DB_PATH: "dodam-med-seg-512-512-128v2" +#FRAG_DB_PATH: "dodam-med-frag-512-512-128v2" +#PROJECT: "zetta-research" + + +// Execution parameters +"@type": "mazepa.execute_on_gcp_with_sqs" +worker_image: "us-east1-docker.pkg.dev/zetta-research/zutils/zetta_utils:dodam-meshing-15" +worker_cluster_name: "zutils-x3" +worker_cluster_region: "us-east1" +worker_cluster_project: "zetta-research" +worker_resources: { + memory: "18560Mi" + //"nvidia.com/gpu": "1" +} +worker_replicas: 100 +num_procs: 1 +semaphores_spec: { + "read": 8 + "write": 8 + "cuda": 0 + "cpu": 8 +} +local_test: true // set to `false` execute remotely +debug: true // set to `false` execute remotely +do_dryrun_estimation: false + +target: { + "@type": "build_make_shards_flow" + segmentation_path: #SEG_PATH + seg_db: { + "@type": "build_datastore_layer" + namespace: #SEG_DB_PATH + project: #PROJECT + } + frag_db: { + "@type": "build_datastore_layer" + namespace: #FRAG_DB_PATH + project: #PROJECT + } + num_lods: 5 + min_shards: 48 + num_shard_no_tasks: 1024 +} diff --git a/tests/unit/geometry/test_bbox.py b/tests/unit/geometry/test_bbox.py index 984d6f68c..8827f3cee 100644 --- a/tests/unit/geometry/test_bbox.py +++ b/tests/unit/geometry/test_bbox.py @@ -1,4 +1,6 @@ # pylint: disable=missing-docstring +from typing import Sequence + import pytest from zetta_utils.geometry import BBox3D, Vec3D @@ -223,6 +225,74 @@ def test_pad_exc(bbox: BBox3D, pad, resolution: Vec3D, expected_exc): bbox.padded(pad=pad, resolution=resolution) +@pytest.mark.parametrize( + "bbox, num_splits, expected", + [ + [ + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + (1, 2, 3), + [ + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + ], + ], + [ + BBox3D(bounds=((0, 3), (0, 4), (0, 5))), + (3, 1, 5), + [ + BBox3D(bounds=((0, 1), (0, 4), (0, 1))), + BBox3D(bounds=((0, 1), (0, 4), (1, 2))), + BBox3D(bounds=((0, 1), (0, 4), (2, 3))), + BBox3D(bounds=((0, 1), (0, 4), (3, 4))), + BBox3D(bounds=((0, 1), (0, 4), (4, 5))), + BBox3D(bounds=((1, 2), (0, 4), (0, 1))), + BBox3D(bounds=((1, 2), (0, 4), (1, 2))), + BBox3D(bounds=((1, 2), (0, 4), (2, 3))), + BBox3D(bounds=((1, 2), (0, 4), (3, 4))), + BBox3D(bounds=((1, 2), (0, 4), (4, 5))), + BBox3D(bounds=((2, 3), (0, 4), (0, 1))), + BBox3D(bounds=((2, 3), (0, 4), (1, 2))), + BBox3D(bounds=((2, 3), (0, 4), (2, 3))), + BBox3D(bounds=((2, 3), (0, 4), (3, 4))), + BBox3D(bounds=((2, 3), (0, 4), (4, 5))), + ], + ], + [ + BBox3D(bounds=((0, 2), (-3, 0), (0, 0))), + (2, 2, 1), + [ + BBox3D(bounds=((0, 1), (-3, -1.5), (0, 0))), + BBox3D(bounds=((0, 1), (-1.5, 0), (0, 0))), + BBox3D(bounds=((1, 2), (-3, -1.5), (0, 0))), + BBox3D(bounds=((1, 2), (-1.5, 0), (0, 0))), + ], + ], + ], +) +def test_split(bbox: BBox3D, num_splits: Sequence[int], expected: BBox3D): + result = bbox.split(num_splits=num_splits) + assert result == expected + + +@pytest.mark.parametrize( + "bbox, num_splits, expected_exc", + [ + [ + BBox3D(bounds=((0, 0), (0, 0), (0, 0))), + (1, 3, 5, 3), + ValueError, + ], + ], +) +def test_num_splits_exc(bbox: BBox3D, num_splits: Sequence[int], expected_exc): + with pytest.raises(expected_exc): + bbox.split(num_splits=num_splits) + + @pytest.mark.parametrize( "bbox, offset, resolution, expected", [ diff --git a/zetta_utils/geometry/bbox.py b/zetta_utils/geometry/bbox.py index f86f9368f..76197d9ea 100644 --- a/zetta_utils/geometry/bbox.py +++ b/zetta_utils/geometry/bbox.py @@ -1,6 +1,7 @@ # pylint: disable=missing-docstring, no-else-raise from __future__ import annotations +from itertools import product from math import floor from typing import Literal, Optional, Sequence, Union, cast @@ -275,6 +276,33 @@ def padded( return result + def split( + self, + num_splits: Sequence[int], + ) -> list[BBox3D]: + """Create a list of bounding boxes formed by splitting this bounding box + evenly by the vector ``num_splits`` in each dimension. + + :param num_splits: How many bounding boxes to divide into along each + dimension. + :return: List of split bounding boxes. + + """ + if len(num_splits) != 3: + raise ValueError("Number of splits must be 3-dimensional.") + + num_splits = Vec3D(*num_splits) + stride = self.shape / num_splits + splits: list[Vec3D] = [Vec3D(*k) for k in product(*(range(n) for n in num_splits))] + return [ + BBox3D.from_coords( + start_coord=self.start + split * stride, + end_coord=self.start + (split + 1) * stride, + unit=self.unit, + ) + for split in splits + ] + def translated( self, offset: Sequence[float], diff --git a/zetta_utils/layer/volumetric/backend.py b/zetta_utils/layer/volumetric/backend.py index e48c1b8bb..784e41a06 100644 --- a/zetta_utils/layer/volumetric/backend.py +++ b/zetta_utils/layer/volumetric/backend.py @@ -66,6 +66,10 @@ def get_chunk_size(self, resolution: Vec3D) -> Vec3D[int]: def get_dataset_size(self, resolution: Vec3D) -> Vec3D[int]: ... + @abstractmethod + def get_bounds(self, resolution: Vec3D) -> VolumetricIndex: + ... + """ TODO: Turn this into a ParamSpec. The .with_changes for VolumetricBackend diff --git a/zetta_utils/layer/volumetric/constant/backend.py b/zetta_utils/layer/volumetric/constant/backend.py index f838b30c5..2ed0941b0 100644 --- a/zetta_utils/layer/volumetric/constant/backend.py +++ b/zetta_utils/layer/volumetric/constant/backend.py @@ -105,6 +105,9 @@ def get_chunk_size(self, resolution: Vec3D) -> Vec3D[int]: # pragma: no cover def get_dataset_size(self, resolution: Vec3D) -> Vec3D[int]: # pragma: no cover return Vec3D[int](0, 0, 0) + def get_bounds(self, resolution: Vec3D) -> VolumetricIndex: # pragma: no cover + return VolumetricIndex.from_coords((0, 0, 0), (0, 0, 0), Vec3D[int](1, 1, 1)) + def get_chunk_aligned_index( self, idx: VolumetricIndex, mode: Literal["expand", "shrink", "round"] ) -> VolumetricIndex: diff --git a/zetta_utils/layer/volumetric/index.py b/zetta_utils/layer/volumetric/index.py index 2d2f31804..0e6678fb7 100644 --- a/zetta_utils/layer/volumetric/index.py +++ b/zetta_utils/layer/volumetric/index.py @@ -64,6 +64,17 @@ def cropped(self, crop: Sequence[int]) -> VolumetricIndex: allow_slice_rounding=self.allow_slice_rounding, ) + def split(self, num_splits: Sequence[int]) -> list[VolumetricIndex]: + return [ + VolumetricIndex( + bbox=split_bbox, + resolution=self.resolution, + chunk_id=self.chunk_id, + allow_slice_rounding=self.allow_slice_rounding, + ) + for split_bbox in self.bbox.split(num_splits) + ] + def translated(self, offset: Sequence[float]) -> VolumetricIndex: return VolumetricIndex( bbox=self.bbox.translated(offset=offset, resolution=self.resolution), diff --git a/zetta_utils/layer/volumetric/layer_set/backend.py b/zetta_utils/layer/volumetric/layer_set/backend.py index 5dc5e1ce6..83fa95077 100644 --- a/zetta_utils/layer/volumetric/layer_set/backend.py +++ b/zetta_utils/layer/volumetric/layer_set/backend.py @@ -150,6 +150,17 @@ def get_dataset_size(self, resolution: Vec3D) -> Vec3D[int]: # pragma: no cover ) return list(dataset_sizes.values())[0] + def get_bounds(self, resolution: Vec3D) -> VolumetricIndex: # pragma: no cover + dataset_bounds = { + k: v.backend.get_bounds(resolution=resolution) for k, v in self.layers.items() + } + if not len(set(dataset_bounds.values())) == 1: + raise ValueError( + "Cannot determine consistent `bounds` for the " + f"volumetric layer set backend. Got: {dataset_bounds}" + ) + return list(dataset_bounds.values())[0] + def get_chunk_aligned_index( self, idx: VolumetricIndex, mode: Literal["expand", "shrink"] ) -> VolumetricIndex: # pragma: no cover @@ -159,7 +170,7 @@ def get_chunk_aligned_index( } if not len(set(chunk_aligned_indexs.values())) == 1: raise ValueError( - "Cannot determine consistent `get_chunk_aligned_index` for the " + "Cannot determine consistent `chunk_aligned_index` for the " f"volumetric layer set backend. Got: {chunk_aligned_indexs}" ) return list(chunk_aligned_indexs.values())[0]