diff --git a/specs/nico/training/aced_misd/preprocess/01_gen_warp_fields.cue b/specs/nico/training/aced_misd/preprocess/01_gen_warp_fields.cue new file mode 100644 index 000000000..52f114b8a --- /dev/null +++ b/specs/nico/training/aced_misd/preprocess/01_gen_warp_fields.cue @@ -0,0 +1,260 @@ +import "math" +import "list" + +#BASE_PATH: "gs://zetta-research-nico/encoder/" +#TGT_IMG_PATH: #BASE_PATH + "datasets/" +#WARPED_SRC_IMG_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "warped_enc/" +#PERLIN_FIELD_PATH: #BASE_PATH + "misd/misalignment_fields/" + +#DATASETS: { + "microns_pinky": { + "contiguous": true + "bounds": [[0, 262144], [0, 131072], [0, 10240]] + "resolution": [32, 32, 40] + } + "microns_basil": { + "contiguous": true + "bounds": [[0, 819200], [0, 983040], [0, 400]] + "resolution": [32, 32, 40] + }, + "microns_minnie": { + "contiguous": false + "bounds": [[0, 1703936], [0, 1441792], [0, 320]] + "resolution": [32, 32, 40] + }, + "microns_interneuron": { + "contiguous": false + "bounds": [[0, 720896], [0, 720896], [0, 1280]] + "resolution": [32, 32, 40] + }, + "aibs_v1dd": { + "contiguous": false + "bounds": [[0.0, 1231667.2], [0.0, 834355.2], [0.0, 1080.0]] + "resolution": [38.8, 38.8, 45.0] + }, + "kim_n2da": { + "contiguous": true + "bounds": [[0, 32768], [0, 32768], [0, 31050]] + "resolution": [32, 32, 50] + }, + "kim_pfc2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 7320]] + "resolution": [32, 32, 40] + }, + "kronauer_cra9": { + "contiguous": true + "bounds": [[0, 393216], [0, 327680], [0, 588]] + "resolution": [32, 32, 42] + }, + "kubota_001": { + "contiguous": true + "bounds": [[0, 204800], [0, 204800], [0, 12000]] + "resolution": [40, 40, 40] + }, + "lee_fanc": { + "contiguous": false + "bounds": [[0.0, 352256.0], [0.0, 951091.2], [0.0, 2700.0]] + "resolution": [34.4, 34.4, 45.0] + }, + "lee_banc": { + "contiguous": false + "bounds": [[0, 819200], [0, 1015808], [0, 900]] + "resolution": [32, 32, 45] + }, + "lee_ppc": { + "contiguous": true + "bounds": [[0, 98304], [0, 98304], [0, 36400]] + "resolution": [32, 32, 40] + }, + "lee_mosquito": { + "contiguous": false + "bounds": [[0, 704512], [0, 450560], [0, 2240]] + "resolution": [32, 32, 40] + }, + "lichtman_zebrafish": { + "contiguous": false + "bounds": [[0, 294912], [0, 393216], [0, 4560]] + "resolution": [32, 32, 30] + }, + "prieto_godino_larva": { + "contiguous": true + "bounds": [[0, 134976], [0, 144992], [0, 14400]] + "resolution": [32, 32, 32] + }, + "fafb_v15": { + "contiguous": false + "bounds": [[0, 884736], [0, 393216], [0, 2000]] + "resolution": [32, 32, 40] + }, + "lichtman_h01": { + "contiguous": false + "bounds": [[0, 3440640], [0, 1933312], [0, 198]] + "resolution": [32, 32, 33] + }, + "janelia_hemibrain": { + "contiguous": true + "bounds": [[0, 317824], [0, 331168], [0, 3296]] + "resolution": [32, 32, 32] + }, + "janelia_manc": { + "contiguous": false + "bounds": [[0, 262144], [0, 360448], [0, 5952]] + "resolution": [32, 32, 32] + }, + "nguyen_thomas_2022": { + "contiguous": true + "bounds": [[0, 998400], [0, 921600], [0, 400]] + "resolution": [32, 32, 40] + }, + "mulcahy_2022_16h": { + "contiguous": true + "bounds": [[0, 243712], [0, 73728], [0, 14700]] + "resolution": [32, 32, 30] + }, + "wildenberg_2021_vta_dat12a": { + "contiguous": true + "bounds": [[0, 82080], [0, 85184], [0, 7640]] + "resolution": [32, 32, 40] + }, + "bumbarber_2013": { + "contiguous": true + "bounds": [[0.0, 63897.6], [0.0, 63897.6], [0.0, 102400.0]] + "resolution": [31.2, 31.2, 50.0] + }, + "wilson_2019_p3": { + "contiguous": true + "bounds": [[0, 163840], [0, 229376], [0, 7020]] + "resolution": [32, 32, 30] + }, + "ishibashi_2021_em1": { + "contiguous": true + "bounds": [[0, 24576], [0, 16384], [0, 4544]] + "resolution": [32, 32, 32] + }, + "ishibashi_2021_em2": { + "contiguous": true + "bounds": [[0, 26624], [0, 18432], [0, 5376]] + "resolution": [32, 32, 32] + }, + "templier_2019_wafer1": { + "contiguous": true + "bounds": [[0, 294912], [0, 229376], [0, 6500]] + "resolution": [32, 32, 50] + }, + "templier_2019_wafer3": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 9750]] + "resolution": [32, 32, 50] + }, + "lichtman_octopus2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 360448], [0, 3180]] + "resolution": [32, 32, 30] + } +} + + +#DST_INFO_CHUNK_SIZE: [2048, 2048, 1] +#PERLIN_FIELD_DS_FACTOR: math.Pow(2, 3) +#FIELD_INFO_OVERRIDE: { + _dataset_bounds: _ + _dst_resolution: _ + type: "image" + data_type: "float32", + num_channels: 2, + scales: [ + { + let vx_res = _dst_resolution + let ds_offset = [ for j in [0, 1, 2] { + _dataset_bounds[j][0] / _dst_resolution[j] // technically should be floor + }] + let ds_size = [ for j in [0, 1, 2] { + math.Ceil((_dataset_bounds[j][1] - _dataset_bounds[j][0]) / _dst_resolution[j]) + }] + + chunk_sizes: [[ for j in [0, 1, 2] {list.Min([#DST_INFO_CHUNK_SIZE[j], ds_size[j]])}]] + resolution: vx_res + encoding: "zfpc" + zfpc_correlated_dims: [true, true, false, false] + zfpc_tolerance: 0.001953125 + key: "\(vx_res[0])_\(vx_res[1])_\(vx_res[2])" + voxel_offset: ds_offset + size: ds_size + } + ], + +} + + + +#MAX_DISP: 20 +#MEDIAN_DISP: 7.5 +#PERLIN_NOISE_TEMPLATE: { + _bounds: _ + let vx_res = dst_resolution + let x_mult = math.Ceil(((_bounds[0][1] - _bounds[0][0]) / vx_res[0]) / 2048) + let y_mult = math.Ceil(((_bounds[1][1] - _bounds[1][0]) / vx_res[1]) / 2048) + "@type": "build_subchunkable_apply_flow" + op: { + "@type": "VolumetricCallableOperation" + fn: { + "@type": "gen_biased_perlin_noise_field" + "@mode": "partial" + shape: [2, x_mult * 2048, y_mult * 2048, 1] + res: [ x_mult * 2, y_mult * 2 ] + max_displacement_px: #MAX_DISP / #PERLIN_FIELD_DS_FACTOR + field_magn_thr_px: #MEDIAN_DISP / #PERLIN_FIELD_DS_FACTOR + octaves: 8 + device: "cpu" + } + crop_pad: [0, 0, 0] + } + dst_resolution: _ + skip_intermediaries: true + processing_chunk_sizes: [[x_mult * 2048, y_mult * 2048, 1]] + processing_crop_pads: [[0, 0, 0]] + expand_bbox_resolution: true + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + dst: { + "@type": "build_cv_layer" + path: _ + info_field_overrides: #FIELD_INFO_OVERRIDE & { + _dataset_bounds: _bounds + _dst_resolution: dst_resolution + } + } +} + + +"@type": "mazepa.execute_on_gcp_with_sqs" +worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" +worker_resources: { + memory: "10560Mi" +} +worker_replicas: 100 +batch_gap_sleep_sec: 0.1 +do_dryrun_estimation: true +local_test: false +worker_cluster_project: "zetta-research" +worker_cluster_region: "us-east1" +worker_cluster_name: "zutils-x3" +target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + #PERLIN_NOISE_TEMPLATE & { + _bounds: dataset.bounds, + dst: path: #PERLIN_FIELD_PATH + key + "/raw_perlin" + + let ds_factor = [#PERLIN_FIELD_DS_FACTOR, #PERLIN_FIELD_DS_FACTOR, 1] + let res = [ for j in [0, 1, 2] {dataset.resolution[j] * ds_factor[j]} ] + dst_resolution: res + } + } + ] +} \ No newline at end of file diff --git a/specs/nico/training/aced_misd/preprocess/02_encode_aligned.cue b/specs/nico/training/aced_misd/preprocess/02_encode_aligned.cue new file mode 100644 index 000000000..9b7fe08f8 --- /dev/null +++ b/specs/nico/training/aced_misd/preprocess/02_encode_aligned.cue @@ -0,0 +1,322 @@ +import "math" +import "list" + +#BASE_PATH: "gs://zetta-research-nico/encoder/" +#TGT_IMG_PATH: #BASE_PATH + "datasets/" // + k +#WARPED_SRC_IMG_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/warped_img" +#DST_TGT_ENC_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/tgt_enc_2023" +#DST_WARPED_SRC_ENC_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/warped_enc_2023" + +#DATASETS: { + "microns_pinky": { + "contiguous": true + "bounds": [[0, 262144], [0, 131072], [0, 10240]] + "resolution": [32, 32, 40] + } + "microns_basil": { + "contiguous": true + "bounds": [[0, 819200], [0, 983040], [0, 400]] + "resolution": [32, 32, 40] + }, + "microns_minnie": { + "contiguous": false + "bounds": [[0, 1703936], [0, 1441792], [0, 320]] + "resolution": [32, 32, 40] + }, + "microns_interneuron": { + "contiguous": false + "bounds": [[0, 720896], [0, 720896], [0, 1280]] + "resolution": [32, 32, 40] + }, + "aibs_v1dd": { + "contiguous": false + "bounds": [[0.0, 1231667.2], [0.0, 834355.2], [0.0, 1080.0]] + "resolution": [38.8, 38.8, 45.0] + }, + "kim_n2da": { + "contiguous": true + "bounds": [[0, 32768], [0, 32768], [0, 31050]] + "resolution": [32, 32, 50] + }, + "kim_pfc2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 7320]] + "resolution": [32, 32, 40] + }, + "kronauer_cra9": { + "contiguous": true + "bounds": [[0, 393216], [0, 327680], [0, 588]] + "resolution": [32, 32, 42] + }, + "kubota_001": { + "contiguous": true + "bounds": [[0, 204800], [0, 204800], [0, 12000]] + "resolution": [40, 40, 40] + }, + "lee_fanc": { + "contiguous": false + "bounds": [[0.0, 352256.0], [0.0, 951091.2], [0.0, 2700.0]] + "resolution": [34.4, 34.4, 45.0] + }, + "lee_banc": { + "contiguous": false + "bounds": [[0, 819200], [0, 1015808], [0, 900]] + "resolution": [32, 32, 45] + }, + "lee_ppc": { + "contiguous": true + "bounds": [[0, 98304], [0, 98304], [0, 36400]] + "resolution": [32, 32, 40] + }, + "lee_mosquito": { + "contiguous": false + "bounds": [[0, 704512], [0, 450560], [0, 2240]] + "resolution": [32, 32, 40] + }, + "lichtman_zebrafish": { + "contiguous": false + "bounds": [[0, 294912], [0, 393216], [0, 4560]] + "resolution": [32, 32, 30] + }, + "prieto_godino_larva": { + "contiguous": true + "bounds": [[0, 134976], [0, 144992], [0, 14400]] + "resolution": [32, 32, 32] + }, + "fafb_v15": { + "contiguous": false + "bounds": [[0, 884736], [0, 393216], [0, 2000]] + "resolution": [32, 32, 40] + }, + "lichtman_h01": { + "contiguous": false + "bounds": [[0, 3440640], [0, 1933312], [0, 198]] + "resolution": [32, 32, 33] + }, + "janelia_hemibrain": { + "contiguous": true + "bounds": [[0, 317824], [0, 331168], [0, 3296]] + "resolution": [32, 32, 32] + }, + "janelia_manc": { + "contiguous": false + "bounds": [[0, 262144], [0, 360448], [0, 5952]] + "resolution": [32, 32, 32] + }, + "nguyen_thomas_2022": { + "contiguous": true + "bounds": [[0, 998400], [0, 921600], [0, 400]] + "resolution": [32, 32, 40] + }, + "mulcahy_2022_16h": { + "contiguous": true + "bounds": [[0, 243712], [0, 73728], [0, 14700]] + "resolution": [32, 32, 30] + }, + "wildenberg_2021_vta_dat12a": { + "contiguous": true + "bounds": [[0, 82080], [0, 85184], [0, 7640]] + "resolution": [32, 32, 40] + }, + "bumbarber_2013": { + "contiguous": true + "bounds": [[0.0, 63897.6], [0.0, 63897.6], [0.0, 102400.0]] + "resolution": [31.2, 31.2, 50.0] + }, + "wilson_2019_p3": { + "contiguous": true + "bounds": [[0, 163840], [0, 229376], [0, 7020]] + "resolution": [32, 32, 30] + }, + "ishibashi_2021_em1": { + "contiguous": true + "bounds": [[0, 24576], [0, 16384], [0, 4544]] + "resolution": [32, 32, 32] + }, + "ishibashi_2021_em2": { + "contiguous": true + "bounds": [[0, 26624], [0, 18432], [0, 5376]] + "resolution": [32, 32, 32] + }, + "templier_2019_wafer1": { + "contiguous": true + "bounds": [[0, 294912], [0, 229376], [0, 6500]] + "resolution": [32, 32, 50] + }, + "templier_2019_wafer3": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 9750]] + "resolution": [32, 32, 50] + }, + "lichtman_octopus2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 360448], [0, 3180]] + "resolution": [32, 32, 30] + } +} + + +#DST_INFO_CHUNK_SIZE: [1024, 1024, 1] +#MAX_TASK_SIZE: [8192, 8192, 1] + +#ENC_INFO_OVERRIDE: { + _dataset_bounds: _ + _highest_resolution: _ + type: "image" + data_type: "int8" + num_channels: 1 + scales: [ + for i in list.Range(0, 4, 1) { + let res_factor = [math.Pow(2, i), math.Pow(2, i), 1] + let vx_res = [ for j in [0, 1, 2] {_highest_resolution[j] * res_factor[j]}] + let ds_offset = [ for j in [0, 1, 2] { + _dataset_bounds[j][0] / vx_res[j] // technically should be floor, but it's + }] + let ds_size = [ for j in [0, 1, 2] { + math.Ceil((_dataset_bounds[j][1] - _dataset_bounds[j][0]) / vx_res[j]) + }] + + chunk_sizes: [[ for j in [0, 1, 2] {list.Min([#DST_INFO_CHUNK_SIZE[j], ds_size[j]])}]] + resolution: vx_res + encoding: "raw" + key: "\(vx_res[0])_\(vx_res[1])_\(vx_res[2])" + voxel_offset: ds_offset + size: ds_size + }, + ] +} + +#MODELS: [ + { + path: "gs://alignment_models/general_encoders_2023/32_32_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [1, 1, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_64_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [2, 2, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_128_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [4, 4, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_256_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [8, 8, 1] + } +] + +#ENCODE_TEMPLATE: { + _bounds: _ + _high_resolution: [number, number, number] + _layer_name: _ + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_bounds[0][1] - _bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_bounds[1][1] - _bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + + "@type": "build_subchunkable_apply_flow" + op: { + "@type": "VolumetricCallableOperation" + operation_name: _layer_name + fn: { + "@type": "BaseEncoder" + model_path: string + } | { + "@type": "BaseCoarsener" + model_path: string + tile_pad_in: int + tile_size: int + ds_factor: int + output_channels: 1 + } + crop_pad: [16, 16, 0] + res_change_mult: [int, int, int] + } + dst_resolution: [number, number, number] + processing_chunk_sizes: [max_chunk_size, [1024, 1024, 1]] + processing_crop_pads: [[0, 0, 0], [16,16,0]] + expand_bbox_resolution: true + skip_intermediaries: true + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + op_kwargs: { + src: { + "@type": "build_cv_layer" + path: _ + } + } + dst: { + "@type": "build_cv_layer" + path: _ + info_field_overrides: #ENC_INFO_OVERRIDE & { + _dataset_bounds: _bounds + _highest_resolution: _high_resolution + } + on_info_exists: "overwrite" + } +} + + + +"@type": "mazepa.execute_on_gcp_with_sqs" +worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" +worker_resources: { + "nvidia.com/gpu": 1 +} +worker_replicas: 200 +batch_gap_sleep_sec: 0.1 +do_dryrun_estimation: true +local_test: false +worker_cluster_project: "zetta-research" +worker_cluster_region: "us-east1" +worker_cluster_name: "zutils-x3" +target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for img_source in ["tgt", "warped_src"] { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + for i in list.Range(0, 4, 1) { + #ENCODE_TEMPLATE & { + _bounds: dataset.bounds, + _high_resolution: dataset.resolution + _layer_name: "Model \(i)" + let _ds_factor = #MODELS[i].res_change_mult + let res = [ for j in [0, 1, 2] {dataset.resolution[j] * _ds_factor[j]} ] + if i == 0 { + op: fn: "@type": "BaseEncoder" + } + if i > 0 { + op: fn: { + "@type": "BaseCoarsener" + tile_pad_in: #ENCODE_TEMPLATE.op.crop_pad[0] * _ds_factor[0] + tile_size: 1024 + ds_factor: _ds_factor[0] + } + } + op: fn: model_path: #MODELS[i].path + op: res_change_mult: _ds_factor + dst_resolution: res + if img_source == "tgt" { + op_kwargs: src: path: #TGT_IMG_PATH + key + dst: path: #DST_TGT_ENC_PATH + key + "/tgt_enc_2023" + } + if img_source == "warped_src" { + op_kwargs: src: path: #WARPED_SRC_IMG_PATH + key + "/warped_img" + dst: path: #DST_WARPED_SRC_ENC_PATH + key + "/warped_enc_2023" + } + } + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/specs/nico/training/aced_misd/preprocess/03_optimize_warp_fields.cue b/specs/nico/training/aced_misd/preprocess/03_optimize_warp_fields.cue new file mode 100644 index 000000000..b2f2bd060 --- /dev/null +++ b/specs/nico/training/aced_misd/preprocess/03_optimize_warp_fields.cue @@ -0,0 +1,847 @@ +import "math" +import "list" + +#BASE_PATH: "gs://zetta-research-nico/encoder/" +// #TGT_IMG_PATH: #BASE_PATH + "datasets/" // + k +#ORIGINAL_WARPED_SRC_IMG_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/warped_img" +#TGT_ENC_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/tgt_enc_2023" +#WARPED_SRC_ENC_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/warped_enc_2023" +#PERLIN_FIELD_PATH: #BASE_PATH + "misd/misalignment_fields/" // + k + "/raw_perlin" +#DST_FIELD_PATH: #BASE_PATH + "misd/misalignment_fields/" // + k + "/optimized_perlin" | "/no_perlin" + "/z\(_z_offset)" + +#DST_WARPED_SRC_IMG_PATH: #BASE_PATH + "misd/img/" // + k + "/good_alignment" | "/bad_alignment" + "/z\(_z_offset)" +#DST_WARPED_SRC_ENC_PATH: #BASE_PATH + "misd/enc/" // + k + "/good_alignment" | "/bad_alignment" + "/z\(_z_offset)" + + +#DATASETS: { + "microns_pinky": { + "contiguous": true + "bounds": [[0, 262144], [0, 131072], [0, 10240]] + "resolution": [32, 32, 40] + } + "microns_basil": { + "contiguous": true + "bounds": [[0, 819200], [0, 983040], [0, 400]] + "resolution": [32, 32, 40] + }, + // // "microns_minnie": { + // // "contiguous": false + // // "bounds": [[0, 1703936], [0, 1441792], [0, 320]] + // // "resolution": [32, 32, 40] + // // }, + // // "microns_interneuron": { + // // "contiguous": false + // // "bounds": [[0, 720896], [0, 720896], [0, 1280]] + // // "resolution": [32, 32, 40] + // // }, + // // "aibs_v1dd": { + // // "contiguous": false + // // "bounds": [[0.0, 1231667.2], [0.0, 834355.2], [0.0, 1080.0]] + // // "resolution": [38.8, 38.8, 45.0] + // // }, + "kim_n2da": { + "contiguous": true + "bounds": [[0, 32768], [0, 32768], [0, 31050]] + "resolution": [32, 32, 50] + }, + "kim_pfc2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 7320]] + "resolution": [32, 32, 40] + }, + "kronauer_cra9": { + "contiguous": true + "bounds": [[0, 393216], [0, 327680], [0, 588]] + "resolution": [32, 32, 42] + }, + "kubota_001": { + "contiguous": true + "bounds": [[0, 204800], [0, 204800], [0, 12000]] + "resolution": [40, 40, 40] + }, + // // "lee_fanc": { + // // "contiguous": false + // // "bounds": [[0.0, 352256.0], [0.0, 951091.2], [0.0, 2700.0]] + // // "resolution": [34.4, 34.4, 45.0] + // // }, + // // "lee_banc": { + // // "contiguous": false + // // "bounds": [[0, 819200], [0, 1015808], [0, 900]] + // // "resolution": [32, 32, 45] + // // }, + "lee_ppc": { + "contiguous": true + "bounds": [[0, 98304], [0, 98304], [0, 36400]] + "resolution": [32, 32, 40] + }, + // // "lee_mosquito": { + // // "contiguous": false + // // "bounds": [[0, 704512], [0, 450560], [0, 2240]] + // // "resolution": [32, 32, 40] + // // }, + // // "lichtman_zebrafish": { + // // "contiguous": false + // // "bounds": [[0, 294912], [0, 393216], [0, 4560]] + // // "resolution": [32, 32, 30] + // // }, + "prieto_godino_larva": { + "contiguous": true + "bounds": [[0, 134976], [0, 144992], [0, 14400]] + "resolution": [32, 32, 32] + }, + // // "fafb_v15": { + // // "contiguous": false + // // "bounds": [[0, 884736], [0, 393216], [0, 2000]] + // // "resolution": [32, 32, 40] + // // }, + // // "lichtman_h01": { + // // "contiguous": false + // // "bounds": [[0, 3440640], [0, 1933312], [0, 198]] + // // "resolution": [32, 32, 33] + // // }, + "janelia_hemibrain": { + "contiguous": true + "bounds": [[0, 317824], [0, 331168], [0, 3296]] + "resolution": [32, 32, 32] + }, + // // "janelia_manc": { + // // "contiguous": false + // // "bounds": [[0, 262144], [0, 360448], [0, 5952]] + // // "resolution": [32, 32, 32] + // // }, + "nguyen_thomas_2022": { + "contiguous": true + "bounds": [[0, 998400], [0, 921600], [0, 400]] + "resolution": [32, 32, 40] + }, + "mulcahy_2022_16h": { + "contiguous": true + "bounds": [[0, 243712], [0, 73728], [0, 14700]] + "resolution": [32, 32, 30] + }, + "wildenberg_2021_vta_dat12a": { + "contiguous": true + "bounds": [[0, 82080], [0, 85184], [0, 7640]] + "resolution": [32, 32, 40] + }, + "bumbarber_2013": { + "contiguous": true + "bounds": [[0.0, 63897.6], [0.0, 63897.6], [0.0, 102400.0]] + "resolution": [31.2, 31.2, 50.0] + }, + "wilson_2019_p3": { + "contiguous": true + "bounds": [[0, 163840], [0, 229376], [0, 7020]] + "resolution": [32, 32, 30] + }, + "ishibashi_2021_em1": { + "contiguous": true + "bounds": [[0, 24576], [0, 16384], [0, 4544]] + "resolution": [32, 32, 32] + }, + "ishibashi_2021_em2": { + "contiguous": true + "bounds": [[0, 26624], [0, 18432], [0, 5376]] + "resolution": [32, 32, 32] + }, + "templier_2019_wafer1": { + "contiguous": true + "bounds": [[0, 294912], [0, 229376], [0, 6500]] + "resolution": [32, 32, 50] + }, + "templier_2019_wafer3": { + "contiguous": true + "bounds": [[0, 229376], [0, 196608], [0, 9750]] + "resolution": [32, 32, 50] + }, + "lichtman_octopus2022": { + "contiguous": true + "bounds": [[0, 229376], [0, 360448], [0, 3180]] + "resolution": [32, 32, 30] + } +} + +#MODELS: [ + { + path: "gs://alignment_models/general_encoders_2023/32_32_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [1, 1, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_64_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [2, 2, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_128_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [4, 4, 1] + }, + { + path: "gs://alignment_models/general_encoders_2023/32_256_C1/2023-11-20.static-2.0.1-model.jit" + res_change_mult: [8, 8, 1] + } +] + + +#DST_INFO_CHUNK_SIZE: [2048, 2048, 1] +#MAX_TASK_SIZE: [8192, 8192, 1] +#PERLIN_FIELD_DS_FACTOR: math.Pow(2, 3) + +#STAGE_TMPL: { + _stage_bounds: _ + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_stage_bounds[0][1] - _stage_bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_stage_bounds[1][1] - _stage_bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + "@type": "ComputeFieldStage" + dst_resolution: _ + processing_chunk_sizes: [max_chunk_size, [2048, 2048, 1]] + processing_crop_pads: [[0, 0, 0], [64, 64, 0]] + expand_bbox_processing: true + expand_bbox_resolution: true + fn: { + "@type": "align_with_online_finetuner" + "@mode": "partial" + sm: int + num_iter: int + lr: float + } +} + + +#FIELD_INFO_OVERRIDE: { + _dataset_bounds: _ + _highest_resolution: _ + type: "image" + data_type: "float32", + num_channels: 2, + scales: [ + for i in list.Range(0, 3, 1) { + let res_factor = [math.Pow(2, i), math.Pow(2, i), 1] + let vx_res = [ for j in [0, 1, 2] {_highest_resolution[j] * res_factor[j]}] + let ds_offset = [ for j in [0, 1, 2] { + _dataset_bounds[j][0] / vx_res[j] // technically should be floor, but it's 0 anyway + }] + let ds_size = [ for j in [0, 1, 2] { + math.Ceil((_dataset_bounds[j][1] - _dataset_bounds[j][0]) / vx_res[j]) + }] + + chunk_sizes: [[ for j in [0, 1, 2] {list.Min([#DST_INFO_CHUNK_SIZE[j], ds_size[j]])}]] + resolution: vx_res + encoding: "zfpc" + zfpc_correlated_dims: [true, true, false, false] + zfpc_tolerance: 0.001953125 + key: "\(vx_res[0])_\(vx_res[1])_\(vx_res[2])" + voxel_offset: ds_offset + size: ds_size + } + ] +} + +#COMPUTE_FIELD_TEMPLATE: { + _bounds: _ + _dst_resolution: [number, number, number] + _layer_name: _ + _z_offset: int + _use_perlin_field: *false | true + + "@type": "build_compute_field_multistage_flow" + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + stages: [ + #STAGE_TMPL & { + _stage_bounds: _bounds + dst_resolution: [_dst_resolution[0] * 4, _dst_resolution[1] * 4, _dst_resolution[2]] + fn: { + sm: 25 + num_iter: 500 + lr: 0.05 + } + }, + #STAGE_TMPL & { + _stage_bounds: _bounds + dst_resolution: [_dst_resolution[0] * 2, _dst_resolution[1] * 2, _dst_resolution[2]] + fn: { + sm: 25 + num_iter: 300 + lr: 0.1 + } + }, + #STAGE_TMPL & { + _stage_bounds: _bounds + dst_resolution: _dst_resolution + fn: { + sm: 25 + num_iter: 200 + lr: 0.1 + } + }, + ] + + if _z_offset == 2 { + src_offset: [0, 0, 1] // src is already offset by 1 + offset_resolution: _dst_resolution + } + src: { + "@type": "build_cv_layer" + path: #WARPED_SRC_ENC_PATH + _layer_name + "/warped_enc_2023" + } + tgt: { + "@type": "build_cv_layer" + path: #TGT_ENC_PATH + _layer_name + "/tgt_enc_2023" + } + dst: { + "@type": "build_cv_layer" + if _use_perlin_field == true { + path: #PERLIN_FIELD_PATH + _layer_name + "/optimized_perlin/z\(_z_offset)" + } + if _use_perlin_field == false { + path: #PERLIN_FIELD_PATH + _layer_name + "/no_perlin/z\(_z_offset)" + } + info_field_overrides: #FIELD_INFO_OVERRIDE & { + _dataset_bounds: _bounds + _highest_resolution: _dst_resolution + } + on_info_exists: "overwrite" + } + tmp_layer_dir: dst.path + "/tmp" + tmp_layer_factory: { + "@type": "build_cv_layer" + "@mode": "partial" + info_field_overrides: #FIELD_INFO_OVERRIDE & { + _dataset_bounds: _bounds + _highest_resolution: _dst_resolution + } + on_info_exists: "overwrite" + } + if _use_perlin_field { + src_field: { + let ds_factor = [#PERLIN_FIELD_DS_FACTOR, #PERLIN_FIELD_DS_FACTOR, 1] + "@type": "build_cv_layer" + path: #PERLIN_FIELD_PATH + _layer_name + "/raw_perlin" + data_resolution: [ for j in [0, 1, 2] {_dst_resolution[j] * ds_factor[j]} ] + interpolation_mode: "field" + } + } +} + + +#WARP_IMG_TEMPLATE: { + _bounds: _ + _layer_name: _ + _z_offset: int + _use_perlin_field: *false | true + + _src_field_path: _ + _dst_img_path: _ + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_bounds[0][1] - _bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_bounds[1][1] - _bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + if _use_perlin_field == true { + _src_field_path: #DST_FIELD_PATH + _layer_name + "/optimized_perlin/z\(_z_offset)" + _dst_img_path: #DST_WARPED_SRC_IMG_PATH + _layer_name + "/bad_alignment/z\(_z_offset)" + } + if _use_perlin_field == false { + _src_field_path: #DST_FIELD_PATH + _layer_name + "/no_perlin/z\(_z_offset)" + _dst_img_path: #DST_WARPED_SRC_IMG_PATH + _layer_name + "/good_alignment/z\(_z_offset)" + } + + "@type": "build_subchunkable_apply_flow" + op: { + "@type": "WarpOperation" + mode: "img" + crop_pad: [256, 256, 0] + } + dst_resolution: _ + processing_chunk_sizes: [max_chunk_size, [2048, 2048, 1]] + processing_crop_pads: [[0, 0, 0], [256, 256, 0]] + skip_intermediaries: true + expand_bbox_processing: true + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + op_kwargs: { + src: { + "@type": "build_cv_layer" + path: #ORIGINAL_WARPED_SRC_IMG_PATH + _layer_name + "/warped_img" + index_procs: [{ + "@type": "VolumetricIndexTranslator" + offset: [0, 0, _z_offset - 1] // src is already offset by 1 + resolution: dst_resolution + }] + } + field: { + "@type": "build_cv_layer" + path: _src_field_path + } + } + dst: { + "@type": "build_cv_layer" + path: _dst_img_path + info_reference_path: op_kwargs.src.path + } +} + +#DOWNSAMPLE_FIELD_TEMPLATE: { + _bounds: _ + _layer_name: _ + _z_offset: int + _use_perlin_field: *false | true + + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_bounds[0][1] - _bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_bounds[1][1] - _bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + + "@type": "build_interpolate_flow" + mode: "field" + src_resolution: [number, number, number] + dst_resolution: [src_resolution[0] * 2, src_resolution[1] * 2, src_resolution[2]] + chunk_size: max_chunk_size + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + + _path: _ + if _use_perlin_field == true { + _path: #DST_FIELD_PATH + _layer_name + "/optimized_perlin/z\(_z_offset)" + } + if _use_perlin_field == false { + _path: #DST_FIELD_PATH + _layer_name + "/no_perlin/z\(_z_offset)" + } + src: { + "@type": "build_cv_layer" + path: _path + } + dst: { + "@type": "build_cv_layer" + path: _path + } + +} + +#FIELD_DIFF_TEMPLATE: { + _bounds: _ + _layer_name: _ + _z_offset: int + + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_bounds[0][1] - _bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_bounds[1][1] - _bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + + "@type": "build_subchunkable_apply_flow" + fn: { + "@type": "torch.sub", "@mode": "partial" + } + processing_chunk_sizes: [max_chunk_size] + processing_crop_pads: [[0, 0, 0]] + dst_resolution: _ + expand_bbox_resolution: true + skip_intermediaries: true + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + op_kwargs: { + input: { + "@type": "build_cv_layer" + path: #DST_FIELD_PATH + _layer_name + "/optimized_perlin/z\(_z_offset)" + } + other: { + "@type": "build_cv_layer" + path: #DST_FIELD_PATH + _layer_name + "/no_perlin/z\(_z_offset)" + } + } + dst: { + "@type": "build_cv_layer" + path: #DST_FIELD_PATH + _layer_name + "/displacements/z\(_z_offset)" + info_reference_path: #TGT_ENC_PATH + _layer_name + "/tgt_enc_2023" + info_field_overrides: { + data_type: "uint8" + } + on_info_exists: "overwrite" + write_procs: [ + { + "@type": "lambda" + lambda_str: "lambda data: (data.norm(dim=0, keepdim=True)*10.0).round().clamp(0, 255).byte()" + } + ] + } +} + + +#ENCODE_IMG_TEMPLATE: { + _bounds: _ + _high_resolution: [number, number, number] + _layer_name: _ + _z_offset: int + _use_perlin_field: *false | true + _model: { + path: _ + res_change_mult: [int, int, int] + } + + let max_chunk_size = [ + list.Min([#MAX_TASK_SIZE[0], math.Ceil((_bounds[0][1] - _bounds[0][0]) / #DST_INFO_CHUNK_SIZE[0] / dst_resolution[0]) * #DST_INFO_CHUNK_SIZE[0]]), + list.Min([#MAX_TASK_SIZE[1], math.Ceil((_bounds[1][1] - _bounds[1][0]) / #DST_INFO_CHUNK_SIZE[1] / dst_resolution[1]) * #DST_INFO_CHUNK_SIZE[1]]), + 1 + ] + + _src_img_path: _ + _dst_enc_path: _ + if _use_perlin_field == true { + _src_img_path: #DST_WARPED_SRC_IMG_PATH + _layer_name + "/bad_alignment/z\(_z_offset)" + _dst_enc_path: #DST_WARPED_SRC_ENC_PATH + _layer_name + "/bad_alignment/z\(_z_offset)" + } + if _use_perlin_field == false { + _src_img_path: #DST_WARPED_SRC_IMG_PATH + _layer_name + "/good_alignment/z\(_z_offset)" + _dst_enc_path: #DST_WARPED_SRC_ENC_PATH + _layer_name + "/good_alignment/z\(_z_offset)" + } + + "@type": "build_subchunkable_apply_flow" + op: { + "@type": "VolumetricCallableOperation" + operation_name: _layer_name + fn: { + if _model.res_change_mult[0] == 1 { + "@type": "BaseEncoder" + } + if _model.res_change_mult[1] > 1 { + "@type": "BaseCoarsener" + tile_pad_in: op.crop_pad[0] + tile_size: 1024 + ds_factor: _model.res_change_mult[0] + } + model_path: _model.path + } + crop_pad: [16, 16, 0] + res_change_mult: _model.res_change_mult + } + dst_resolution: [ for j in [0, 1, 2] {_high_resolution[j] * _model.res_change_mult[j]} ] + processing_chunk_sizes: [max_chunk_size, [1024, 1024, 1]] + processing_crop_pads: [[0, 0, 0], [16,16,0]] + expand_bbox_resolution: true + skip_intermediaries: true + bbox: { + "@type": "BBox3D.from_coords", + start_coord: [_bounds[0][0], _bounds[1][0], _bounds[2][0]] + end_coord: [_bounds[0][1], _bounds[1][1], _bounds[2][1]] + } + op_kwargs: { + src: { + "@type": "build_cv_layer" + path: _src_img_path + } + } + dst: { + "@type": "build_cv_layer" + path: _dst_enc_path + info_reference_path: #TGT_ENC_PATH + _layer_name + "/tgt_enc_2023" + } +} + + +#COMPUTE_FIELD_STAGE: { + "@type": "mazepa.execute_on_gcp_with_sqs" + worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" + worker_resources: { + "nvidia.com/gpu": "1" + } + worker_replicas: 300 + batch_gap_sleep_sec: 0.1 + do_dryrun_estimation: true + local_test: false + worker_cluster_project: "zetta-research" + worker_cluster_region: "us-east1" + worker_cluster_name: "zutils-x3" + target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + #COMPUTE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + _dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + }, + #COMPUTE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + _dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + _use_perlin_field: true + }, + if dataset.contiguous { + #COMPUTE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + _dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + } + }, + if dataset.contiguous { + #COMPUTE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + _dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + _use_perlin_field: true + }, + } + ] + } + ] + } +} + + +#WARP_IMAGE_STAGE: { + "@type": "mazepa.execute_on_gcp_with_sqs" + worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" + worker_resources: { + "memory": "8Gi" + } + worker_replicas: 100 + batch_gap_sleep_sec: 0.1 + do_dryrun_estimation: true + local_test: false + worker_cluster_project: "zetta-research" + worker_cluster_region: "us-east1" + worker_cluster_name: "zutils-x3" + target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + #WARP_IMG_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + }, + #WARP_IMG_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + _use_perlin_field: true + }, + if dataset.contiguous { + #WARP_IMG_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + } + } + if dataset.contiguous { + #WARP_IMG_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + _use_perlin_field: true + }, + } + ] + } + ] + } +} + +#DOWNSAMPLE_FIELD_STAGE: { + "@type": "mazepa.execute_on_gcp_with_sqs" + worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" + worker_resources: { + "memory": "8Gi" + } + worker_replicas: 100 + batch_gap_sleep_sec: 0.1 + do_dryrun_estimation: true + local_test: false + worker_cluster_project: "zetta-research" + worker_cluster_region: "us-east1" + worker_cluster_name: "zutils-x3" + target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + #DOWNSAMPLE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + src_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + }, + #DOWNSAMPLE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + src_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + _use_perlin_field: true + }, + if dataset.contiguous { + #DOWNSAMPLE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + src_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + } + } + if dataset.contiguous { + #DOWNSAMPLE_FIELD_TEMPLATE & { + _bounds: dataset.bounds, + src_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + _use_perlin_field: true + } + } + ] + } + ] + } +} + +#EXTRACT_DISPLACEMENT_STAGE: { + "@type": "mazepa.execute_on_gcp_with_sqs" + worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" + worker_resources: { + "memory": "8Gi" + } + worker_replicas: 100 + batch_gap_sleep_sec: 0.1 + do_dryrun_estimation: true + local_test: false + worker_cluster_project: "zetta-research" + worker_cluster_region: "us-east1" + worker_cluster_name: "zutils-x3" + target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + #FIELD_DIFF_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + }, + #FIELD_DIFF_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: [dataset.resolution[0] * 2, dataset.resolution[1] * 2, dataset.resolution[2]] + _layer_name: key, + _z_offset: 1 + }, + if dataset.contiguous { + #FIELD_DIFF_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + } + } + if dataset.contiguous { + #FIELD_DIFF_TEMPLATE & { + _bounds: dataset.bounds, + dst_resolution: [dataset.resolution[0] * 2, dataset.resolution[1] * 2, dataset.resolution[2]] + _layer_name: key, + _z_offset: 2 + } + } + ] + } + ] + } +} + + +#ENCODE_IMAGE_STAGE: { + "@type": "mazepa.execute_on_gcp_with_sqs" + worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231118" + worker_resources: { + "nvidia.com/gpu": "1" + } + worker_replicas: 300 + batch_gap_sleep_sec: 0.1 + do_dryrun_estimation: true + local_test: false + worker_cluster_project: "zetta-research" + worker_cluster_region: "us-east1" + worker_cluster_name: "zutils-x3" + target: { + "@type": "mazepa.concurrent_flow" + stages: [ + for key, dataset in #DATASETS { + "@type": "mazepa.concurrent_flow" + stages: [ + for i in list.Range(0, 2, 1) { + "@type": "mazepa.concurrent_flow" + stages: [ + #ENCODE_IMG_TEMPLATE & { + _bounds: dataset.bounds, + _high_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + _model: #MODELS[i] + }, + #ENCODE_IMG_TEMPLATE & { + _bounds: dataset.bounds, + _high_resolution: dataset.resolution + _layer_name: key, + _z_offset: 1 + _use_perlin_field: true + _model: #MODELS[i] + }, + if dataset.contiguous { + #ENCODE_IMG_TEMPLATE & { + _bounds: dataset.bounds, + _high_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + _model: #MODELS[i] + } + } + if dataset.contiguous { + #ENCODE_IMG_TEMPLATE & { + _bounds: dataset.bounds, + _high_resolution: dataset.resolution + _layer_name: key, + _z_offset: 2 + _use_perlin_field: true + _model: #MODELS[i] + }, + } + ] + } + ] + } + ] + } +} + + +[ + #COMPUTE_FIELD_STAGE, + #WARP_IMAGE_STAGE, + #DOWNSAMPLE_FIELD_STAGE, + #EXTRACT_DISPLACEMENT_STAGE, + #ENCODE_IMAGE_STAGE, +] \ No newline at end of file diff --git a/specs/nico/training/aced_misd/train/z1z2_enc_misd.cue b/specs/nico/training/aced_misd/train/z1z2_enc_misd.cue new file mode 100644 index 000000000..4a9a9f17c --- /dev/null +++ b/specs/nico/training/aced_misd/train/z1z2_enc_misd.cue @@ -0,0 +1,351 @@ +import "strings" +import "strconv" +import "list" + +#EXP_NAME: "aced_misd_general" +#TRAINING_ROOT: "gs://zetta-research-nico/training_artifacts" +#LR: 2e-4 +#K: 3 +#CHUNK_XY: 1024 +#FM: 32 + +#FIELD_MAGN_THR: 5.0 +#Z_OFFSETS: [1, 2] +#DS_FACTOR: 1 + + +#EXP_VERSION: "1.0.0_dsfactor\(#DS_FACTOR)_thr\(#FIELD_MAGN_THR)_lr\(#LR)_z" + strings.Join([for z in #Z_OFFSETS {strconv.FormatInt(z, 10)}], "_") +#MODEL_CKPT: null // "gs://zetta-research-nico/training_artifacts/aced_misd_cns/thr5.0_lr0.00005_z1z2_400-500_2910-2920_more_aligned_unet5_32/last.ckpt" + +#BASE_PATH: "gs://zetta-research-nico/encoder/" +#SRC_ENC_PATH: #BASE_PATH + "misd/enc/" // + k + ["/good_alignment"|"/bad_alignment"] + "/z\(_z_offset)" +#TGT_ENC_PATH: #BASE_PATH + "pairwise_aligned/" // + k + "/tgt_enc_2023" +#DISP_PATH: #BASE_PATH + "misd/misalignment_fields/" // + k + "/displacements/z\(_z_offset)" + +#VAL_DATASETS: { + "microns_basil": {"resolution": [32, 32, 40], "num_samples": 2591}, +} + +#TRAIN_DATASETS: { + "microns_pinky": {"resolution": [32, 32, 40], "num_samples": 5019}, + // "microns_basil": {"resolution": [32, 32, 40], "num_samples": 2591}, + "kim_n2da": {"resolution": [32, 32, 50], "num_samples": 446}, + "kim_pfc2022": {"resolution": [32, 32, 40], "num_samples": 3699}, + "kronauer_cra9": {"resolution": [32, 32, 42], "num_samples": 740}, + "kubota_001": {"resolution": [40, 40, 40], "num_samples": 4744}, + "lee_ppc": {"resolution": [32, 32, 40], "num_samples": 7219}, + "prieto_godino_larva": {"resolution": [32, 32, 32], "num_samples": 4584}, + "janelia_hemibrain": {"resolution": [32, 32, 32], "num_samples": 5304}, + "nguyen_thomas_2022": {"resolution": [32, 32, 40], "num_samples": 1847}, + "mulcahy_2022_16h": {"resolution": [32, 32, 30], "num_samples": 3379}, + "wildenberg_2021_vta_dat12a": {"resolution": [32, 32, 40], "num_samples": 1704}, + "bumbarber_2013": {"resolution": [31.2, 31.2, 50.0], "num_samples": 7325}, + "wilson_2019_p3": {"resolution": [32, 32, 30], "num_samples": 2092}, + "ishibashi_2021_em1": {"resolution": [32, 32, 32], "num_samples": 141}, + "ishibashi_2021_em2": {"resolution": [32, 32, 32], "num_samples": 166}, + "templier_2019_wafer1": {"resolution": [32, 32, 50], "num_samples": 5401}, + "templier_2019_wafer3": {"resolution": [32, 32, 50], "num_samples": 3577}, + "lichtman_octopus2022": {"resolution": [32, 32, 30], "num_samples": 5673}, +} + +#UNET_DOWNSAMPLE: { + "@type": "torch.nn.MaxPool2d" + "@mode": "partial" + kernel_size: 2 +} + +#UNET_UPSAMPLE: { + { + "@type": "UpConv" + "@mode": "partial" + kernel_size: #K + upsampler: { + "@type": "torch.nn.Upsample" + "@mode": "partial" + scale_factor: 2 + mode: "nearest" + align_corners: null + }, + conv: { + "@type": "torch.nn.Conv2d" + "@mode": "partial" + padding: 1 + } + } +} + +#TARGET: { + "@type": "lightning_train" + regime: { + "@type": "MisalignmentDetectorAcedRegime" + output_mode: "binary" + encoder_path: null + max_shared_displacement_px: 0.0 + max_src_displacement_px: { + "@type": "uniform_distr" + low: 0.0 + high: 0.0 + } + equivar_rot_deg_distr: { + "@type": "uniform_distr" + low: 0.0 + high: 0.0 + } + equivar_trans_px_distr: { + "@type": "uniform_distr" + low: 0.0 + high: 0.0 + } + + field_magn_thr: #FIELD_MAGN_THR + val_log_row_interval: 4 + train_log_row_interval: 200 + lr: #LR + model: { + "@type": "load_weights_file" + model: { + "@type": "torch.nn.Sequential" + modules: [ + { + "@type": "ConvBlock", + "@version": "0.0.2" + num_channels: [2, #FM], + kernel_sizes: [5, 5], + activate_last: true, + }, + { + "@type": "UNet" + "@version": "0.0.2" + list_num_channels: [ + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + + [#FM, #FM, #FM], + + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + [#FM, #FM, #FM], + ] + downsample: #UNET_DOWNSAMPLE + upsample: #UNET_UPSAMPLE + activate_last: true + kernel_sizes: [#K, #K] + padding_modes: "zeros" + unet_skip_mode: "sum" + skips: {"0": 2} + }, + { + "@type": "torch.nn.Conv2d" + in_channels: #FM + out_channels: 1 + kernel_size: 1 + }, + // { + // "@type": "torch.nn.Sigmoid" # Regime applies binary_cross_entropy_with_logits + // } + ] + }, + ckpt_path: #MODEL_CKPT + component_names: [ + "model", + ] + } + } + trainer: { + "@type": "ZettaDefaultTrainer" + accelerator: "gpu" + precision: "16-mixed", + devices: 1 + max_epochs: 100 + default_root_dir: #TRAINING_ROOT + experiment_name: #EXP_NAME + experiment_version: #EXP_VERSION + log_every_n_steps: 10 + val_check_interval: 500 + num_sanity_val_steps: -1 + reload_dataloaders_every_n_epochs: 1, + checkpointing_kwargs: { + update_every_n_secs: 1700 + backup_every_n_secs: 3700 + } + } + + train_dataloader: { + "@type": "TorchDataLoader" + batch_size: 8 + //shuffle: true + sampler: { + "@type": "SamplerWrapper", + sampler: { + "@type": "TorchRandomSampler" + data_source: { + "@type": "torch.arange" + "end": list.Sum([for dataset in #TRAIN_DATASETS {dataset.num_samples}]) + }, + replacement: false, + num_samples: 4000, + }, + }, + num_workers: 19 + dataset: #TRAINING + pin_memory: true + } + val_dataloader: { + "@type": "TorchDataLoader" + batch_size: 4 + shuffle: false + num_workers: 19 + dataset: #VALIDATION + pin_memory: true + } +} + + +#ENC_PROCS: [ + { + "@mode": "partial" + "@type": "rearrange" + "pattern": "c x y 1 -> c x y" + }, + { + "@type": "divide" + "@mode": "partial" + value: 127.0 + }, +] + +#DISP_PROCS: [ + { + "@mode": "partial" + "@type": "rearrange" + "pattern": "c x y 1 -> c x y" + }, + { + "@type": "divide" + "@mode": "partial" + value: 10.0 + }, +] + + +#TRAINING: { + "@type": "JointDataset" + mode: "horizontal" + datasets: { + images: { + "@type": "JointDataset" + mode: "vertical" + datasets: { + for key, dataset in #TRAIN_DATASETS { + for z_offset in #Z_OFFSETS { + "\(key)_z\(z_offset)": { + "@type": "LayerDataset" + layer: { + "@type": "build_layer_set" + layers: { + src: { + "@type": "build_cv_layer" + path: #SRC_ENC_PATH + key + "/bad_alignment/z\(z_offset)" + read_procs: #ENC_PROCS + } + tgt: { + "@type": "build_cv_layer" + path: #TGT_ENC_PATH + key + "/tgt_enc_2023" + read_procs: #ENC_PROCS + } + displacement: { + "@type": "build_cv_layer" + path: #DISP_PATH + key + "/displacements/z\(z_offset)" + read_procs: #DISP_PROCS + } + } + } + sample_indexer: { + "@type": "RandomIndexer", + inner_indexer: { + "@type": "VolumetricNGLIndexer", + resolution: [dataset.resolution[0] * #DS_FACTOR, dataset.resolution[1] * #DS_FACTOR, dataset.resolution[2]], + chunk_size: [#CHUNK_XY, #CHUNK_XY, 1], + path: "zetta-research-nico/encoder/pairwise_aligned/" + key, + } + } + }, + } + } + } + } + } +} + + +#VALIDATION: { + "@type": "JointDataset" + mode: "horizontal" + datasets: { + images: { + "@type": "JointDataset" + mode: "vertical" + datasets: { + for key, dataset in #VAL_DATASETS { + for z_offset in #Z_OFFSETS { + "\(key)_z\(z_offset)": { + "@type": "LayerDataset" + layer: { + "@type": "build_layer_set" + layers: { + src: { + "@type": "build_cv_layer" + path: #SRC_ENC_PATH + key + "/bad_alignment/z\(z_offset)" + read_procs: #ENC_PROCS + } + tgt: { + "@type": "build_cv_layer" + path: #TGT_ENC_PATH + key + "/tgt_enc_2023" + read_procs: #ENC_PROCS + } + displacement: { + "@type": "build_cv_layer" + path: #DISP_PATH + key + "/displacements/z\(z_offset)" + read_procs: #DISP_PROCS + } + } + } + sample_indexer: { + "@type": "LoopIndexer", + desired_num_samples: 100 + inner_indexer: { + "@type": "VolumetricNGLIndexer", + resolution: [dataset.resolution[0] * #DS_FACTOR, dataset.resolution[1] * #DS_FACTOR, dataset.resolution[2]], + chunk_size: [#CHUNK_XY, #CHUNK_XY, 1], + path: "zetta-research-nico/encoder/pairwise_aligned/" + key, + } + } + }, + } + } + } + } + } +} + + + +// "@type": "lightning_train_remote" +// "@mode": "partial" +// worker_cluster_name: "zutils-x3" +// worker_cluster_region: "us-east1" +// worker_cluster_project: "zetta-research" +// worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20231113" +// worker_resources: {"nvidia.com/gpu": "4"}, +// worker_resource_requests: {"memory": "27560Mi", "cpu": 28}, +// num_nodes: 1 +// spec_path: #TARGET +// follow_logs: true +// env_vars: {"LOGLEVEL": "INFO", "NCCL_SOCKET_IFNAME": "eth0"}, + +[#TARGET] \ No newline at end of file diff --git a/specs/nico/training/aced_misd_cns/z1z2_enc_misd.cue b/specs/nico/training/aced_misd_cns/z1z2_enc_misd.cue deleted file mode 100644 index 6a60887c8..000000000 --- a/specs/nico/training/aced_misd_cns/z1z2_enc_misd.cue +++ /dev/null @@ -1,757 +0,0 @@ -#EXP_NAME: "aced_misd_cns" -#TRAINING_ROOT: "gs://zetta-research-nico/training_artifacts" -#LR: 1e-5 -#CLIP: 0e-5 -#K: 3 -#CHUNK_XY: 1024 -#FIELD_MAGN_THR: 5.0 - - -#EXP_VERSION: "thr\(#FIELD_MAGN_THR)_lr\(#LR)_z1z2_400-500_2910-2920_more_aligned_unet5_32_finetune_2" -#MODEL_CKPT: "gs://zetta-research-nico/training_artifacts/aced_misd_cns/thr5.0_lr0.00005_z1z2_400-500_2910-2920_more_aligned_unet5_32/last.ckpt" - -// #TGT_CV: "gs://zetta-research-nico/pairs_dsets/cns_x0_400-500/encs_warped/0" -// #SRC_Z2_PREFIX: "gs://zetta-research-nico/misd/enc/local_optima_400-500/enc_z2/med_7.5px_max_" -// #DISP_Z2_PREFIX: "gs://zetta-research-nico/misd/cns/local_optima_400-500/vec_length10x_z2/med_7.5px_max_" -// #MAX_DISP: 20 - -"@type": "mazepa.execute_on_gcp_with_sqs" -worker_image: "us.gcr.io/zetta-research/zetta_utils:nico_py3.9_20230405" -worker_resources: { - memory: "38560Mi" - "nvidia.com/gpu": "1" -} -worker_replicas: 1 - -local_test: false - -#UNET_DOWNSAMPLE: { - "@type": "torch.nn.MaxPool2d" - "@mode": "partial" - kernel_size: 2 -} - -#UNET_UPSAMPLE: { - { - "@type": "UpConv" - "@mode": "partial" - kernel_size: #K - upsampler: { - "@type": "torch.nn.Upsample" - "@mode": "partial" - scale_factor: 2 - mode: "nearest" - align_corners: null - }, - conv: { - "@type": "torch.nn.Conv2d" - "@mode": "partial" - padding: "same" - } - } -} - -target: { - "@type": "lightning_train" - "@mode": "partial" - - regime: { - "@type": "MisalignmentDetectorAcedRegime" - output_mode: "binary" - encoder_path: null - max_shared_displacement_px: 0.0 - max_src_displacement_px: { - "@type": "uniform_distr" - low: 0.0 - high: 0.0 - } - equivar_rot_deg_distr: { - "@type": "uniform_distr" - low: 0.0 - high: 0.0 - } - equivar_trans_px_distr: { - "@type": "uniform_distr" - low: 0.0 - high: 0.0 - } - - field_magn_thr: #FIELD_MAGN_THR - val_log_row_interval: 4 - train_log_row_interval: 200 - lr: #LR - model: { - "@type": "load_weights_file" - model: { - "@type": "torch.nn.Sequential" - modules: [ - { - "@type": "UNet" - "@version": "0.0.2" - list_num_channels: [ - [2, 32, 32], - [32, 32, 32], - [32, 32, 32], - [32, 32, 32], - [32, 32, 32], - - [32, 32, 32], - - [32, 32, 32], - [32, 32, 32], - [32, 32, 32], - [32, 32, 32], - [32, 32, 32], - ] - downsample: #UNET_DOWNSAMPLE - upsample: #UNET_UPSAMPLE - activate_last: true - kernel_sizes: [#K, #K] - padding_modes: "zeros" - unet_skip_mode: "sum" - skips: {"1": 2} - }, - { - "@type": "torch.nn.Conv2d" - in_channels: 32 - out_channels: 1 - kernel_size: 1 - }, - { - "@type": "torch.nn.Sigmoid" - } - ] - }, - ckpt_path: #MODEL_CKPT - component_names: [ - "model", - ] - } - } - trainer: { - "@type": "ZettaDefaultTrainer" - accelerator: "gpu" - devices: 1 - max_epochs: 100 - default_root_dir: #TRAINING_ROOT - experiment_name: #EXP_NAME - experiment_version: #EXP_VERSION - log_every_n_steps: 10 - val_check_interval: 1000 - gradient_clip_algorithm: "norm" - gradient_clip_val: #CLIP - checkpointing_kwargs: { - update_every_n_secs: 60 - backup_every_n_secs: 900 - } - } - - train_dataloader: { - "@type": "TorchDataLoader" - batch_size: 8 - shuffle: true - num_workers: 12 - dataset: #TRAINING_DSET - } - val_dataloader: { - "@type": "TorchDataLoader" - batch_size: 4 - shuffle: false - num_workers: 8 - dataset: #VAL_DSET - } -} - - -#IMG_PROCS: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "divide" - "@mode": "partial" - value: 127.0 - }, -] - -#DISP_PROCS: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "divide" - "@mode": "partial" - value: 10.0 - }, -] - - -#TRAINING_DSET: { - "@type": "JointDataset" - mode: "horizontal" - datasets: { - images: { - "@type": "JointDataset" - mode: "vertical" - datasets: { - for z_offset in [1, 2] { - "z400_500_\(z_offset)": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_400-500/fine_misaligned/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta_lee_fly_cns_001_alignment_temp/aced/coarse_x0/encodings_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_fields_400-500/fine_diff3/-\(z_offset)" - read_procs: #DISP_PROCS - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "ChainIndexer" - inner_indexer: [ - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [1 * 2048, 1 * 2048, 400] - end_coord: [4 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [6 * 2048, 1 * 2048, 400] - end_coord: [9 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [11 * 2048, 1 * 2048, 400] - end_coord: [15 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - } - ] - } - } - }, - "z400_500_\(z_offset)_aligned": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_400-500/fine/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta_lee_fly_cns_001_alignment_temp/aced/coarse_x0/encodings_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "file:///tmp/placeholder_400-500" - cv_kwargs: { - fill_missing: true - } - info_reference_path: "gs://zetta-research-nico/misd/cns/pairwise_fields_400-500/fine_diff3/-\(z_offset)" - read_procs: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "torch.zeros_like" - "@mode": "partial" - }, - { - "@type": "torch.add" - "@mode": "partial" - other: 0.0 - } - ] - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "ChainIndexer" - inner_indexer: [ - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [1 * 2048, 1 * 2048, 400] - end_coord: [4 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [6 * 2048, 1 * 2048, 400] - end_coord: [9 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [11 * 2048, 1 * 2048, 400] - end_coord: [15 * 2048, 4 * 2048, 498] - resolution: [32, 32, 45] - } - } - ] - } - } - }, - "z2910_2920_\(z_offset)": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_2908-2921/fine_misaligned/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/pairs_dsets/cns_x0_2910-2920_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_fields_2908-2921/fine_diff3/-\(z_offset)" - read_procs: #DISP_PROCS - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "ChainIndexer" - inner_indexer: [ - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [3 * 1024, 2 * 1024, 2910] - end_coord: [27 * 1024, 8 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [13 * 1024, 8 * 1024, 2910] - end_coord: [16 * 1024, 16 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [12 * 1024, 16 * 1024, 2910] - end_coord: [21 * 1024, 20 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [12 * 1024, 21 * 1024, 2910] - end_coord: [17 * 1024, 25 * 1024, 2921] - resolution: [32, 32, 45] - } - } - ] - } - } - }, - "z2910_2920_\(z_offset)_aligned": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_2908-2921/fine/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/pairs_dsets/cns_x0_2910-2920_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "file:///tmp/placeholder_2908-2921" - cv_kwargs: { - fill_missing: true - } - info_reference_path: "gs://zetta-research-nico/misd/cns/pairwise_fields_2908-2921/fine_diff3/-\(z_offset)" - read_procs: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "torch.zeros_like" - "@mode": "partial" - }, - { - "@type": "torch.add" - "@mode": "partial" - other: 0.0 - } - ] - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "ChainIndexer" - inner_indexer: [ - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [3 * 1024, 2 * 1024, 2910] - end_coord: [27 * 1024, 8 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [13 * 1024, 8 * 1024, 2910] - end_coord: [16 * 1024, 16 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [12 * 1024, 16 * 1024, 2910] - end_coord: [21 * 1024, 20 * 1024, 2921] - resolution: [32, 32, 45] - } - }, - { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [12 * 1024, 21 * 1024, 2910] - end_coord: [17 * 1024, 25 * 1024, 2921] - resolution: [32, 32, 45] - } - } - ] - } - } - }, - "false_neg_z\(z_offset)": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_3406-3410/fine/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/pairs_dsets/cns_x0_3406-3410_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "file:///tmp/placeholder_3406-3410" - cv_kwargs: { - fill_missing: true - } - info_reference_path: "gs://zetta-research-nico/misd/cns/pairwise_fields_2908-2921/fine_diff3/-\(z_offset)" - read_procs: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "torch.full_like" - "@mode": "partial" - fill_value: 255.0 - }, - { - "@type": "torch.add" - "@mode": "partial" - other: 0.0 - } - ] - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "LoopIndexer" - if z_offset == 1 { - desired_num_samples: 12500 - } - if z_offset == 2 { - desired_num_samples: 8000 - } - inner_indexer: { - "@type": "VolumetricNGLIndexer" - resolution: [32, 32, 45] - chunk_size: [1024, 1024, 1] - path: "nkem/cns/false_neg_z\(z_offset)" - } - } - } - } - }, - } - } - } -} - - -#VAL_DSET: { - "@type": "JointDataset" - mode: "horizontal" - datasets: { - images: { - "@type": "JointDataset" - mode: "vertical" - datasets: { - for z_offset in [2] { - "z2000_2001_\(z_offset)": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_1998-2001/fine_misaligned/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/pairs_dsets/cns_x0_1998-2001_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_fields_1998-2001/fine_diff3/-\(z_offset)" - read_procs: #DISP_PROCS - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [3 * 1024, 3 * 1024, 2000] - end_coord: [14 * 1024, 7 * 1024, 2001] - resolution: [32, 32, 45] - } - }, - } - }, - "z2000_2001_\(z_offset)_aligned": { - "@type": "LayerDataset" - layer: { - "@type": "build_layer_set" - layers: { - src: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/misd/cns/pairwise_enc_1998-2001/fine/-\(z_offset)" - read_procs: #IMG_PROCS - } - tgt: { - "@type": "build_cv_layer" - path: "gs://zetta-research-nico/pairs_dsets/cns_x0_1998-2001_masked" - read_procs: #IMG_PROCS - index_procs: [ - { - "@type": "VolumetricIndexTranslator" - offset: [0, 0, -z_offset] - resolution: [32, 32, 45] - } - ] - } - displacement: { - "@type": "build_cv_layer" - path: "file:///tmp/placeholder_1998-2001" - cv_kwargs: { - fill_missing: true - } - info_reference_path: "gs://zetta-research-nico/misd/cns/pairwise_fields_1998-2001/fine_diff3/-\(z_offset)" - read_procs: [ - { - "@mode": "partial" - "@type": "rearrange" - "pattern": "c x y 1 -> c x y" - }, - { - "@type": "torch.zeros_like" - "@mode": "partial" - }, - { - "@type": "torch.add" - "@mode": "partial" - other: 0.0 - } - ] - } - } - } - sample_indexer: { - "@type": "RandomIndexer" - inner_indexer: { - "@type": "VolumetricStridedIndexer" - resolution: [32, 32, 45] - stride: [#CHUNK_XY, #CHUNK_XY, 1] - chunk_size: [#CHUNK_XY, #CHUNK_XY, 1] - bbox: { - "@type": "BBox3D.from_coords" - start_coord: [3 * 1024, 3 * 1024, 2000] - end_coord: [14 * 1024, 7 * 1024, 2001] - resolution: [32, 32, 45] - } - }, - } - }, - } - } - } - } -} diff --git a/zetta_utils/training/lightning/regimes/alignment/misalignment_detector_aced.py b/zetta_utils/training/lightning/regimes/alignment/misalignment_detector_aced.py index 756ce0ee8..f374426d2 100644 --- a/zetta_utils/training/lightning/regimes/alignment/misalignment_detector_aced.py +++ b/zetta_utils/training/lightning/regimes/alignment/misalignment_detector_aced.py @@ -1,4 +1,5 @@ # pylint: disable=too-many-locals +import os from typing import Literal, Optional import attrs @@ -107,15 +108,11 @@ def validation_epoch_start(self, _): # pylint: disable=no-self-use seed_everything(42) def on_validation_epoch_end(self): - self.log_results( - "val", - "worst", - **self.worst_val_sample, - ) - self.worst_val_loss = 0 - self.worst_val_sample = {} - self.worst_val_sample_idx = None - seed_everything(None) + env_seed = os.environ.get("PL_GLOBAL_SEED") + if env_seed is not None: + seed_everything(int(env_seed) + self.current_epoch) + else: + seed_everything(None) def _get_warped(self, img, field=None): img_padded = torch.nn.functional.pad(img, (1, 1, 1, 1), value=self.zero_value) @@ -236,7 +233,7 @@ def compute_misd_loss(self, batch: dict, mode: str, log_row: bool, sample_name: weight = torch.ones_like(gt_labels, dtype=torch.float32) weight[intersect_tissue == 0] = 0.0 - loss_map = torch.nn.functional.binary_cross_entropy( + loss_map = torch.nn.functional.binary_cross_entropy_with_logits( prediction, gt_labels.float(), weight=weight, reduction="none" )