From 0a502a8b1adf4a90a0d3c73cde25e6c9101935c9 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 08:45:42 +0100
Subject: [PATCH 1/6] Add bbox centroid fix

---
 movement/io/load_bboxes.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/movement/io/load_bboxes.py b/movement/io/load_bboxes.py
index 6971de0f..c009f37b 100644
--- a/movement/io/load_bboxes.py
+++ b/movement/io/load_bboxes.py
@@ -402,6 +402,11 @@ def _numpy_arrays_from_via_tracks_file(file_path: Path) -> dict:
 
         array_dict[key] = np.stack(list_arrays, axis=1).squeeze()
 
+    # Transform position_array to represent centroid of bbox,
+    # rather than top-left corner
+    # (top left corner: corner of the bbox with minimum x and y coordinates)
+    array_dict["position_array"] += array_dict["shape_array"] / 2
+
     # Add remaining arrays to dict
     array_dict["ID_array"] = df["ID"].unique().reshape(-1, 1)
     array_dict["frame_array"] = df["frame_number"].unique().reshape(-1, 1)
@@ -415,14 +420,16 @@ def _df_from_via_tracks_file(file_path: Path) -> pd.DataFrame:
     Read the VIA tracks .csv file as a pandas dataframe with columns:
     - ID: the integer ID of the tracked bounding box.
     - frame_number: the frame number of the tracked bounding box.
-    - x: the x-coordinate of the tracked bounding box centroid.
-    - y: the y-coordinate of the tracked bounding box centroid.
+    - x: the x-coordinate of the tracked bounding box top-left corner.
+    - y: the y-coordinate of the tracked bounding box top-left corner.
     - w: the width of the tracked bounding box.
     - h: the height of the tracked bounding box.
     - confidence: the confidence score of the tracked bounding box.
 
     The dataframe is sorted by ID and frame number, and for each ID,
-    empty frames are filled in with NaNs.
+    empty frames are filled in with NaNs. The coordinates of the bboxes
+    are assumed to be in the image coordinate system, with the origin at the
+    top-left corner of the image.
     """
     # Read VIA tracks .csv file as a pandas dataframe
     df_file = pd.read_csv(file_path, sep=",", header=0)

From e110d4536b0c15590a2cd07acab440e1886e4a98 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:05:06 +0100
Subject: [PATCH 2/6] Add tests

---
 tests/test_unit/test_load_bboxes.py | 51 +++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_load_bboxes.py
index dbed362c..29c13110 100644
--- a/tests/test_unit/test_load_bboxes.py
+++ b/tests/test_unit/test_load_bboxes.py
@@ -419,3 +419,54 @@ def test_fps_and_time_coords(
     else:
         start_frame = 0
     assert_time_coordinates(ds, expected_fps, start_frame)
+
+
+def test_df_from_via_tracks_file(via_tracks_file):
+    """Test that the helper function correctly reads the VIA tracks .csv file
+    as a dataframe.
+    """
+    df = load_bboxes._df_from_via_tracks_file(via_tracks_file)
+
+    assert isinstance(df, pd.DataFrame)
+
+    # Check data is for 5 frames
+    assert len(df.frame_number.unique()) == 5
+
+    # Check all individuals are defined for every frame (even if Nan)
+    assert df.shape[0] == len(df.ID.unique()) * 5
+
+    # Check columns
+    assert list(df.columns) == [
+        "ID",
+        "frame_number",
+        "x",
+        "y",
+        "w",
+        "h",
+        "confidence",
+    ]
+
+
+def test_position_numpy_array_from_via_tracks_file(via_tracks_file):
+    """Test the extracted position array from the VIA tracks .csv file
+    represents the centroid of the bbox.
+    """
+    # Extract arrays from VIA tracks .csv file
+    bboxes_arrays = load_bboxes._numpy_arrays_from_via_tracks_file(
+        via_tracks_file
+    )
+
+    # Read VIA tracks .csv file as a dataframe
+    df = load_bboxes._df_from_via_tracks_file(via_tracks_file)
+
+    # Check the centroid values
+    for k, id in enumerate(bboxes_arrays["ID_array"]):
+        df_one_ID = df[df["ID"] == id.item()]
+        centroid_position = np.array(
+            [df_one_ID.x + df_one_ID.w / 2, df_one_ID.y + df_one_ID.h / 2]
+        ).T
+
+        assert np.allclose(
+            bboxes_arrays["position_array"][:, k, :],
+            centroid_position,
+        )

From 3d189f65311b1023f3098d4e4c18705b2d9f452f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:13:41 +0100
Subject: [PATCH 3/6] Remove loop from assert

---
 tests/test_unit/test_load_bboxes.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_load_bboxes.py
index 29c13110..fc84f9ae 100644
--- a/tests/test_unit/test_load_bboxes.py
+++ b/tests/test_unit/test_load_bboxes.py
@@ -459,14 +459,18 @@ def test_position_numpy_array_from_via_tracks_file(via_tracks_file):
     # Read VIA tracks .csv file as a dataframe
     df = load_bboxes._df_from_via_tracks_file(via_tracks_file)
 
-    # Check the centroid values
-    for k, id in enumerate(bboxes_arrays["ID_array"]):
+    # Compute centroid positions from the dataframe
+    # (go thru in the same order as ID array)
+    list_derived_centroids = []
+    for id in bboxes_arrays["ID_array"]:
         df_one_ID = df[df["ID"] == id.item()]
         centroid_position = np.array(
             [df_one_ID.x + df_one_ID.w / 2, df_one_ID.y + df_one_ID.h / 2]
-        ).T
+        ).T  # frames, xy
+        list_derived_centroids.append(centroid_position)
 
-        assert np.allclose(
-            bboxes_arrays["position_array"][:, k, :],
-            centroid_position,
-        )
+    # Compare to extracted position array
+    assert np.allclose(
+        bboxes_arrays["position_array"],  # frames, individuals, xy
+        np.stack(list_derived_centroids, axis=1),
+    )

From 0c814f5fbe99200b0138446d2130014c93fcbe9b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:15:35 +0100
Subject: [PATCH 4/6] Clarify docstring

---
 movement/io/load_bboxes.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/movement/io/load_bboxes.py b/movement/io/load_bboxes.py
index c009f37b..8550a2e8 100644
--- a/movement/io/load_bboxes.py
+++ b/movement/io/load_bboxes.py
@@ -420,16 +420,16 @@ def _df_from_via_tracks_file(file_path: Path) -> pd.DataFrame:
     Read the VIA tracks .csv file as a pandas dataframe with columns:
     - ID: the integer ID of the tracked bounding box.
     - frame_number: the frame number of the tracked bounding box.
-    - x: the x-coordinate of the tracked bounding box top-left corner.
-    - y: the y-coordinate of the tracked bounding box top-left corner.
+    - x: the x-coordinate of the tracked bounding box's top-left corner.
+    - y: the y-coordinate of the tracked bounding box's top-left corner.
     - w: the width of the tracked bounding box.
     - h: the height of the tracked bounding box.
     - confidence: the confidence score of the tracked bounding box.
 
     The dataframe is sorted by ID and frame number, and for each ID,
     empty frames are filled in with NaNs. The coordinates of the bboxes
-    are assumed to be in the image coordinate system, with the origin at the
-    top-left corner of the image.
+    are assumed to be in the image coordinate system (i.e., the top-left
+    corner of a bbox is its corner with minimum x and y coordinates).
     """
     # Read VIA tracks .csv file as a pandas dataframe
     df_file = pd.read_csv(file_path, sep=",", header=0)

From 126550ef1ce524524836b5082af15378462c0dd8 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:23:46 +0100
Subject: [PATCH 5/6] Remove spaces

---
 tests/test_unit/test_load_bboxes.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_load_bboxes.py
index fc84f9ae..e12a514c 100644
--- a/tests/test_unit/test_load_bboxes.py
+++ b/tests/test_unit/test_load_bboxes.py
@@ -428,14 +428,10 @@ def test_df_from_via_tracks_file(via_tracks_file):
     df = load_bboxes._df_from_via_tracks_file(via_tracks_file)
 
     assert isinstance(df, pd.DataFrame)
-
-    # Check data is for 5 frames
     assert len(df.frame_number.unique()) == 5
-
-    # Check all individuals are defined for every frame (even if Nan)
-    assert df.shape[0] == len(df.ID.unique()) * 5
-
-    # Check columns
+    assert (
+        df.shape[0] == len(df.ID.unique()) * 5
+    )  # all individuals in all frames (even if nan)
     assert list(df.columns) == [
         "ID",
         "frame_number",
@@ -451,7 +447,7 @@ def test_position_numpy_array_from_via_tracks_file(via_tracks_file):
     """Test the extracted position array from the VIA tracks .csv file
     represents the centroid of the bbox.
     """
-    # Extract arrays from VIA tracks .csv file
+    # Extract numpy arrays from VIA tracks .csv file
     bboxes_arrays = load_bboxes._numpy_arrays_from_via_tracks_file(
         via_tracks_file
     )

From 8bdf2710e0e286d29b71aab9c5097f73913aaced Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 21:24:57 +0100
Subject: [PATCH 6/6] Update tests/test_unit/test_load_bboxes.py

Co-authored-by: Niko Sirmpilatze <niko.sirbiladze@gmail.com>
---
 tests/test_unit/test_load_bboxes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_load_bboxes.py
index e12a514c..474e6118 100644
--- a/tests/test_unit/test_load_bboxes.py
+++ b/tests/test_unit/test_load_bboxes.py
@@ -459,9 +459,9 @@ def test_position_numpy_array_from_via_tracks_file(via_tracks_file):
     # (go thru in the same order as ID array)
     list_derived_centroids = []
     for id in bboxes_arrays["ID_array"]:
-        df_one_ID = df[df["ID"] == id.item()]
+        df_one_id = df[df["ID"] == id.item()]
         centroid_position = np.array(
-            [df_one_ID.x + df_one_ID.w / 2, df_one_ID.y + df_one_ID.h / 2]
+            [df_one_id.x + df_one_id.w / 2, df_one_id.y + df_one_id.h / 2]
         ).T  # frames, xy
         list_derived_centroids.append(centroid_position)