From 14ac7d12b3c0072e2e0c19e3b83e76389088135e Mon Sep 17 00:00:00 2001 From: dlbrittain Date: Thu, 23 May 2024 15:37:32 -0700 Subject: [PATCH] fix: get tests working, fix requirements --- .../workflows/ingest_new_annotations.py | 3 +- .../workflows/spatial_lookup.py | 2 +- requirements.in | 2 +- requirements.txt | 4 +- tests/test_spaital_lookup.py | 223 +++++++++++++----- 5 files changed, 169 insertions(+), 65 deletions(-) diff --git a/materializationengine/workflows/ingest_new_annotations.py b/materializationengine/workflows/ingest_new_annotations.py index db9907c5..bf2eabff 100644 --- a/materializationengine/workflows/ingest_new_annotations.py +++ b/materializationengine/workflows/ingest_new_annotations.py @@ -951,8 +951,7 @@ def get_new_root_ids(materialization_data: dict, mat_metadata: dict) -> dict: drop_col_names = list( supervoxel_df.loc[:, supervoxel_df.columns.str.endswith("position")] ) - supervoxel_df = supervoxel_df.drop(drop_col_names, 1) - + supervoxel_df = supervoxel_df.drop(labels=drop_col_names, axis=1) AnnotationModel = create_annotation_model(mat_metadata, with_crud_columns=True) SegmentationModel = create_segmentation_model(mat_metadata) diff --git a/materializationengine/workflows/spatial_lookup.py b/materializationengine/workflows/spatial_lookup.py index e3c86913..340b874d 100644 --- a/materializationengine/workflows/spatial_lookup.py +++ b/materializationengine/workflows/spatial_lookup.py @@ -535,7 +535,7 @@ def calc_min_enclosing_and_sub_volumes( [outside_max[0], global_bbox[0, 1] - 1, outside_max[2]], ] ) - )["chunk_size"] + ) if outside_max[1] > global_bbox[1, 1]: sub_volumes.append( np.array( diff --git a/requirements.in b/requirements.in index 3b026f0b..738403ad 100644 --- a/requirements.in +++ b/requirements.in @@ -5,7 +5,7 @@ pillow>=8.3.2 psutil>=5.6.6 cloud-files>=4.6.1 pandas -flask==2.0.2 +flask==2.3.3 SQLAlchemy<1.4 Flask-SQLAlchemy jsonschema diff --git a/requirements.txt b/requirements.txt index 8398b372..9e040575 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,8 @@ attrs==23.2.0 # jsonschema billiard==4.2.0 # via celery +blinker==1.8.2 + # via flask blosc2==2.6.2 # via tables boto3==1.34.112 @@ -125,7 +127,7 @@ fastremap==1.14.1 # via # cloud-volume # crackle-codec -flask==2.0.2 +flask==2.3.3 # via # -r requirements.in # flask-admin diff --git a/tests/test_spaital_lookup.py b/tests/test_spaital_lookup.py index db169ab8..3dbaffb3 100644 --- a/tests/test_spaital_lookup.py +++ b/tests/test_spaital_lookup.py @@ -1,5 +1,10 @@ import numpy as np import pandas as pd +import pytest +from materializationengine.workflows.spatial_lookup import ( + calc_min_enclosing_and_sub_volumes, +) + def create_dataframes(seg_columns, data): segmentation_dataframe = pd.DataFrame(columns=seg_columns, dtype=object) @@ -8,71 +13,169 @@ def create_dataframes(seg_columns, data): print(f"Data dataframe: {data_df}") return segmentation_dataframe, data_df + def merge_dataframes(segmentation_dataframe, data_df): common_cols = segmentation_dataframe.columns.intersection(data_df.columns) print(f"Common columns: {common_cols}") - df = pd.merge(segmentation_dataframe[common_cols], data_df[common_cols], how="right") + df = pd.merge( + segmentation_dataframe[common_cols], data_df[common_cols], how="right" + ) df = df.infer_objects().fillna(0) df = df.reindex(columns=segmentation_dataframe.columns, fill_value=0) print(f"Merged dataframe: {df}") return df -def test_dataframe_merging_basic(): - seg_columns = ["id", "column1", "column2"] - data = { - "id": [1, 2, 3], - "column1": [10, 20, 30], - "column3": [100, 200, 300], - } - segmentation_dataframe, data_df = create_dataframes(seg_columns, data) - df = merge_dataframes(segmentation_dataframe, data_df) - assert df["id"].tolist() == [1, 2, 3] - assert df["column1"].tolist() == [10, 20, 30] - assert df["column2"].tolist() == [0, 0, 0] - -def test_dataframe_merging_fewer_columns(): - seg_columns = ["id", "column1", "column2", "column3"] - data = { - "id": [1, 2, 3], - "column1": [10, 20, 30], - } - segmentation_dataframe, data_df = create_dataframes(seg_columns, data) - df = merge_dataframes(segmentation_dataframe, data_df) - assert df["id"].tolist() == [1, 2, 3] - assert df["column1"].tolist() == [10, 20, 30] - assert df["column2"].tolist() == [0, 0, 0] - assert df["column3"].tolist() == [0, 0, 0] - -def test_dataframe_merging_uint64(): - seg_columns = ["id", "column1", "column2"] - data = { - "id": [1, 2, 3], - "column1": [np.uint64(2**63), np.uint64(2**63 + 1), np.uint64(2**63 + 2)], - "column2": [np.uint64(2**64 - 3), np.uint64(2**64 - 2), np.uint64(2**64 - 1)], - } - segmentation_dataframe, data_df = create_dataframes(seg_columns, data) - df = merge_dataframes(segmentation_dataframe, data_df) - assert df["id"].tolist() == [1, 2, 3] - assert df["column1"].tolist() == [np.uint64(2**63), np.uint64(2**63 + 1), np.uint64(2**63 + 2)] - assert df["column2"].tolist() == [np.uint64(2**64 - 3), np.uint64(2**64 - 2), np.uint64(2**64 - 1)] - -def test_dataframe_merging_int64(): - seg_columns = ["id", "column1", "column2"] - data = { - "id": [1, 2, 3], - "column1": [np.int64(2**62), np.int64(2**62 + 1), np.int64(2**62 + 2)], - "column2": [np.int64(-2**63), np.int64(-2**63 + 1), np.int64(-2**63 + 2)], - } - segmentation_dataframe, data_df = create_dataframes(seg_columns, data) - df = merge_dataframes(segmentation_dataframe, data_df) - assert df["id"].tolist() == [1, 2, 3] - assert df["column1"].tolist() == [np.int64(2**62), np.int64(2**62 + 1), np.int64(2**62 + 2)] - assert df["column2"].tolist() == [np.int64(-2**63), np.int64(-2**63 + 1), np.int64(-2**63 + 2)] - -if __name__ == "__main__": - test_dataframe_merging_basic() - test_dataframe_merging_fewer_columns() - test_dataframe_merging_uint64() - test_dataframe_merging_int64() - print("All tests passed!") - \ No newline at end of file + +class TestSpatialLookup: + + def test_dataframe_merging_basic(self): + seg_columns = ["id", "column1", "column2"] + data = { + "id": [1, 2, 3], + "column1": [10, 20, 30], + "column3": [100, 200, 300], + } + segmentation_dataframe, data_df = create_dataframes(seg_columns, data) + df = merge_dataframes(segmentation_dataframe, data_df) + assert df["id"].tolist() == [1, 2, 3] + assert df["column1"].tolist() == [10, 20, 30] + assert df["column2"].tolist() == [0, 0, 0] + + + def test_dataframe_merging_fewer_columns(self): + seg_columns = ["id", "column1", "column2", "column3"] + data = { + "id": [1, 2, 3], + "column1": [10, 20, 30], + } + segmentation_dataframe, data_df = create_dataframes(seg_columns, data) + df = merge_dataframes(segmentation_dataframe, data_df) + assert df["id"].tolist() == [1, 2, 3] + assert df["column1"].tolist() == [10, 20, 30] + assert df["column2"].tolist() == [0, 0, 0] + assert df["column3"].tolist() == [0, 0, 0] + + + def test_dataframe_merging_uint64(self): + seg_columns = ["id", "column1", "column2"] + data = { + "id": [1, 2, 3], + "column1": [np.uint64(2**63), np.uint64(2**63 + 1), np.uint64(2**63 + 2)], + "column2": [np.uint64(2**64 - 3), np.uint64(2**64 - 2), np.uint64(2**64 - 1)], + } + segmentation_dataframe, data_df = create_dataframes(seg_columns, data) + df = merge_dataframes(segmentation_dataframe, data_df) + assert df["id"].tolist() == [1, 2, 3] + assert df["column1"].tolist() == [ + np.uint64(2**63), + np.uint64(2**63 + 1), + np.uint64(2**63 + 2), + ] + assert df["column2"].tolist() == [ + np.uint64(2**64 - 3), + np.uint64(2**64 - 2), + np.uint64(2**64 - 1), + ] + + + def test_dataframe_merging_int64(self): + seg_columns = ["id", "column1", "column2"] + data = { + "id": [1, 2, 3], + "column1": [np.int64(2**62), np.int64(2**62 + 1), np.int64(2**62 + 2)], + "column2": [np.int64(-(2**63)), np.int64(-(2**63) + 1), np.int64(-(2**63) + 2)], + } + segmentation_dataframe, data_df = create_dataframes(seg_columns, data) + df = merge_dataframes(segmentation_dataframe, data_df) + assert df["id"].tolist() == [1, 2, 3] + assert df["column1"].tolist() == [ + np.int64(2**62), + np.int64(2**62 + 1), + np.int64(2**62 + 2), + ] + assert df["column2"].tolist() == [ + np.int64(-(2**63)), + np.int64(-(2**63) + 1), + np.int64(-(2**63) + 2), + ] + + + # Test cases for the function calc_min_enclosing_and_sub_volumes + + + @pytest.mark.parametrize( + "test_id, input_bboxes, global_bbox, chunk_size, cv_resolution, coord_resolution, expected_enclosing_bbox, expected_sub_volumes", + [ + # Happy path test: Single bbox fully within global_bbox + ( + "HP-1", + [[np.array([10, 10, 10]), np.array([20, 20, 20])]], + [np.array([0, 0, 0]), np.array([30, 30, 30])], + [5, 5, 5], + [1, 1, 1], + [1, 1, 1], + np.array([[10, 10, 10], [20, 20, 20]]), + [], + ), + # Happy path test: Multiple bboxes, some partially outside global_bbox + ( + "HP-2", + [ + [np.array([10, 10, 10]), np.array([40, 40, 40])], + [np.array([-10, -10, -10]), np.array([5, 5, 5])], + ], + [np.array([0, 0, 0]), np.array([30, 30, 30])], + [10, 10, 10], + [1, 1, 1], + [1, 1, 1], + np.array([[0, 0, 0], [30, 30, 30]]), + [np.array([[31, 0, 0], [40, 40, 40]]), + np.array([[0, 31, 0], [40, 40, 40]]), + np.array([[0, 0, 31], [40, 40, 40]]), + np.array([[-10, -10, -10], [-1, 30, 30]]), + np.array([[-10, -10, -10], [30, -1, 30]]), + np.array([[-10, -10, -10], [30, 30, -1]])], + ), + # Edge case: No bboxes provided + ( + "EC-1", + [], + [np.array([0, 0, 0]), np.array([30, 30, 30])], + [10, 10, 10], + [1, 1, 1], + [1, 1, 1], + None, + [], + ), + ], + ) + def test_calc_min_enclosing_and_sub_volumes( + self, + test_id, + input_bboxes, + global_bbox, + chunk_size, + cv_resolution, + coord_resolution, + expected_enclosing_bbox, + expected_sub_volumes, + ): + # Arrange + # Parameters are set up by pytest's parametrize, so no additional arrangement is necessary. + + # Act + result_enclosing_bbox, result_sub_volumes = calc_min_enclosing_and_sub_volumes( + input_bboxes, global_bbox, chunk_size, cv_resolution, coord_resolution + ) + + # Assert + if expected_enclosing_bbox is None: + assert result_enclosing_bbox is None + else: + np.testing.assert_array_equal(result_enclosing_bbox, expected_enclosing_bbox) + + assert len(result_sub_volumes) == len(expected_sub_volumes) + for result_sub, expected_sub in zip(result_sub_volumes, expected_sub_volumes): + np.testing.assert_array_equal(result_sub, expected_sub) + +