From 8c3997c9c3d57d524b18d1a46c9a0c3f638ea87a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 10 Sep 2024 11:10:37 +0100
Subject: [PATCH 01/25] exploring predictions vs ground-truth

---
 examples/load_and_explore_bboxes.py | 233 ++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 examples/load_and_explore_bboxes.py

diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py
new file mode 100644
index 00000000..000dc561
--- /dev/null
+++ b/examples/load_and_explore_bboxes.py
@@ -0,0 +1,233 @@
+"""Inspect crab trajectories"""
+
+# %%
+import ast
+import itertools
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from cycler import cycler
+
+from movement.io import load_bboxes
+
+# %matplotlib widget
+
+# %%%%%%%%%%%%%%%%%%%%%
+# input data
+file_csv = (
+    "/Users/sofia/arc/project_Zoo_crabs/escape_clips/"
+    "crabs_track_output_selected_clips/04.09.2023-04-Right_RE_test/predicted_tracks.csv"
+)
+
+
+# load ground truth!
+groundtruth_csv = (
+    "/Users/sofia/arc/project_Zoo_crabs/escape_clips/"
+    "04.09.2023-04-Right_RE_test_corrected_ST_csv_SM.csv"
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%
+# Fix ground truth file
+df = pd.read_csv(groundtruth_csv, sep=",", header=0)
+
+# find duplicates
+list_unique_filenames = list(set(df.filename))
+filenames_to_rep_ID = {}
+for file in list_unique_filenames:
+    df_one_filename = df.loc[df["filename"] == file]
+
+    list_track_ids_one_filename = [
+        int(ast.literal_eval(row.region_attributes)["track"])
+        for row in df_one_filename.itertuples()
+    ]
+
+    if len(set(list_track_ids_one_filename)) != len(
+        list_track_ids_one_filename
+    ):
+        # [
+        #     list_track_ids_one_filename.remove(k)
+        #     for k in set(list_track_ids_one_filename)
+        # ]  # there could be more than one duplicate!!!
+        for k in set(list_track_ids_one_filename):
+            list_track_ids_one_filename.remove(k)  # remove first occurrence
+
+        filenames_to_rep_ID[file] = list_track_ids_one_filename
+
+# delete duplicate rows
+for file, list_rep_ID in filenames_to_rep_ID.items():
+    for rep_ID in list_rep_ID:
+        # find repeated rows for selected file and rep_ID
+        matching_rows = df[
+            (df["filename"] == file)
+            & (df["region_attributes"] == f'{{"track":"{rep_ID}"}}')
+        ]
+
+        # Identify the index of the first matching row
+        if not matching_rows.empty:
+            indices_to_drop = matching_rows.index[1:]
+
+            # Drop all but the first matching row
+            df = df.drop(indices_to_drop)
+
+# save to csv
+groundtruth_csv_corrected = Path(groundtruth_csv).parent / Path(
+    Path(groundtruth_csv).stem + "_corrected.csv"
+)
+df.to_csv(groundtruth_csv_corrected, index=False)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Read corrected ground truth as movement dataset
+ds_gt = load_bboxes.from_via_tracks_file(
+    groundtruth_csv_corrected, fps=None, use_frame_numbers_from_file=False
+)
+print(ds_gt)
+
+# Print summary
+print(f"{ds_gt.source_file}")
+print(f"Number of frames: {ds_gt.sizes['time']}")
+print(f"Number of individuals: {ds_gt.sizes['individuals']}")
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Read predictions as movement dataset
+ds_pred = load_bboxes.from_via_tracks_file(
+    file_csv, fps=None, use_frame_numbers_from_file=False
+)
+print(ds_pred)
+
+# Print summary
+print(f"{ds_pred.source_file}")
+print(f"Number of frames: {ds_pred.sizes['time']}")
+print(f"Number of individuals: {ds_pred.sizes['individuals']}")
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Check when individuals are labelled
+# check x and y coordinates are nan at the same locations
+# TODO: change colormap to white and blue
+assert (
+    np.isnan(ds_gt.position.data[:, :, 0])
+    == np.isnan(ds_gt.position.data[:, :, 1])
+).all()
+
+fig, axs = plt.subplots(2, 1, sharex=True)
+
+axs[0].matshow(np.isnan(ds_gt.position.data[:, :, 0]).T, aspect="auto")
+axs[0].set_title("Ground truth")
+axs[0].set_xlabel("time (frames)")
+axs[0].set_ylabel("individual")
+
+axs[1].matshow(np.isnan(ds_pred.position.data[:, :, 0]).T, aspect="auto")
+axs[1].set_title("Prediction")
+axs[1].set_xlabel("time (frames)")
+axs[1].set_ylabel("tracks")
+axs[1].xaxis.tick_bottom()
+
+# # add reference
+# axs[1].hlines(
+#     y=ds_gt.sizes["individuals"],
+#     xmin=0,
+#     xmax=ds_gt.sizes["time"] - 1,
+#     color="red",
+# )
+
+fig.subplots_adjust(hspace=0.6, wspace=0.5)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Compare lengths of continuous tracks & plot distrib
+
+# for each individual, find the length of chunks between nans
+map_individuals_to_chunks = {}
+for individual in range(ds_gt.sizes["individuals"]):
+    # find nans in x-coord for that individual
+    nan_idx = np.isnan(ds_gt.position.data[:, individual, 0])
+
+    # find lengths of continuous tracks
+    len_chunks = [
+        len(list(group_iter))
+        for key, group_iter in itertools.groupby(nan_idx)
+        if not key
+    ]
+
+    map_individuals_to_chunks[individual] = len_chunks
+
+# %%
+fig, ax = plt.subplots(1, 1)
+for ind, list_chunks in map_individuals_to_chunks.items():
+    ax.scatter([ind] * len(list_chunks), list_chunks)
+
+
+# [sum(1 for _ in input) for _, input in itertools.groupby(_)]
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Check confidence of detections
+confidence_values = ds_pred.confidence.data.flatten()
+nan_median_confidence = np.nanmedian(confidence_values)
+
+
+fig, ax = plt.subplots(1, 1)
+hist = ax.hist(confidence_values, bins=np.arange(0, 1.01, 0.05))
+ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red")
+ax.set_aspect("auto")
+
+fig, ax = plt.subplots(1, 1)
+ax.hist(ds_pred.confidence.data.flatten(), bins=np.arange(0.6, 1.01, 0.01))
+ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red")
+ax.set_aspect("auto")
+
+print(f"Median confidence: {nan_median_confidence}")
+
+# %%
+# plot all trajectories
+# ds.position ---> time, individuals, space
+# why noise? remove low predictions?
+
+for ds, title in zip(
+    [ds_gt, ds_pred], ["Ground truth", "Prediction"], strict=False
+):
+    # cmap = plt.get_cmap('tab10')
+    fig, ax = plt.subplots(1, 1)
+    plt.rcParams["axes.prop_cycle"] = cycler(
+        color=plt.get_cmap("tab10").colors
+    )
+
+    for ind_idx in range(ds.sizes["individuals"]):
+        ax.scatter(
+            x=ds.position[:, ind_idx, 0],  # nframes, nindividuals, x
+            y=ds.position[:, ind_idx, 1],
+            s=1,
+            # c=cmap(ind_idx),
+        )
+    ax.set_aspect("equal")
+    ax.set_ylim(-150, 2500)
+    ax.set_xlabel("x (pixels)")
+    ax.set_ylabel("y (pixels)")
+    ax.set_title(title)
+    plt.show()
+
+# %%
+# first 10 individuals
+fig, ax = plt.subplots(1, 1)
+
+ax.scatter(x=ds_pred.position[:, :10, 0], y=ds_pred.position[:, :10, 1], s=1)
+ax.set_aspect("equal")
+ax.set_xlabel("x (pixels)")
+ax.set_ylabel("y (pixels)")
+# %%
+# groupby
+# It generates a break or new group every time the value of the key function
+# changes
+# input = (
+#   np.isnan(ds_gt.position.data[:,0,0]*ds_gt.position.data[:,0,1]
+#  ).astype(int))
+input = [0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]
+len_per_chunk = [
+    (key, len(list(group_iter)))
+    for key, group_iter in itertools.groupby(input)
+]
+len_per_chunk_with_1 = [
+    len(list(group_iter))
+    for key, group_iter in itertools.groupby(input)
+    if key == 1
+]

From d956aae6fd6ab3e75a56bace5b48ab4cec307a37 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 11:42:08 +0100
Subject: [PATCH 02/25] Draft

---
 examples/load_and_explore_bboxes.py | 283 +++++++---------------------
 1 file changed, 72 insertions(+), 211 deletions(-)

diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py
index 000dc561..b6443418 100644
--- a/examples/load_and_explore_bboxes.py
+++ b/examples/load_and_explore_bboxes.py
@@ -1,233 +1,94 @@
-"""Inspect crab trajectories"""
+"""Load and explore bboxes tracks
+===============================
 
-# %%
-import ast
-import itertools
-from pathlib import Path
+Load and explore an example dataset of bounding boxes tracks.
+"""
 
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
+# %%
+# Imports
+# -------
+# For interactive plots: install ipympl with `pip install ipympl` and uncomment
+# the following line in your notebook
+# %matplotlib widget
 from cycler import cycler
+from matplotlib import pyplot as plt
 
+from movement import sample_data
 from movement.io import load_bboxes
 
-# %matplotlib widget
-
-# %%%%%%%%%%%%%%%%%%%%%
-# input data
-file_csv = (
-    "/Users/sofia/arc/project_Zoo_crabs/escape_clips/"
-    "crabs_track_output_selected_clips/04.09.2023-04-Right_RE_test/predicted_tracks.csv"
-)
-
-
-# load ground truth!
-groundtruth_csv = (
-    "/Users/sofia/arc/project_Zoo_crabs/escape_clips/"
-    "04.09.2023-04-Right_RE_test_corrected_ST_csv_SM.csv"
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%
-# Fix ground truth file
-df = pd.read_csv(groundtruth_csv, sep=",", header=0)
-
-# find duplicates
-list_unique_filenames = list(set(df.filename))
-filenames_to_rep_ID = {}
-for file in list_unique_filenames:
-    df_one_filename = df.loc[df["filename"] == file]
-
-    list_track_ids_one_filename = [
-        int(ast.literal_eval(row.region_attributes)["track"])
-        for row in df_one_filename.itertuples()
-    ]
-
-    if len(set(list_track_ids_one_filename)) != len(
-        list_track_ids_one_filename
-    ):
-        # [
-        #     list_track_ids_one_filename.remove(k)
-        #     for k in set(list_track_ids_one_filename)
-        # ]  # there could be more than one duplicate!!!
-        for k in set(list_track_ids_one_filename):
-            list_track_ids_one_filename.remove(k)  # remove first occurrence
-
-        filenames_to_rep_ID[file] = list_track_ids_one_filename
-
-# delete duplicate rows
-for file, list_rep_ID in filenames_to_rep_ID.items():
-    for rep_ID in list_rep_ID:
-        # find repeated rows for selected file and rep_ID
-        matching_rows = df[
-            (df["filename"] == file)
-            & (df["region_attributes"] == f'{{"track":"{rep_ID}"}}')
-        ]
-
-        # Identify the index of the first matching row
-        if not matching_rows.empty:
-            indices_to_drop = matching_rows.index[1:]
-
-            # Drop all but the first matching row
-            df = df.drop(indices_to_drop)
-
-# save to csv
-groundtruth_csv_corrected = Path(groundtruth_csv).parent / Path(
-    Path(groundtruth_csv).stem + "_corrected.csv"
-)
-df.to_csv(groundtruth_csv_corrected, index=False)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Read corrected ground truth as movement dataset
-ds_gt = load_bboxes.from_via_tracks_file(
-    groundtruth_csv_corrected, fps=None, use_frame_numbers_from_file=False
-)
-print(ds_gt)
-
-# Print summary
-print(f"{ds_gt.source_file}")
-print(f"Number of frames: {ds_gt.sizes['time']}")
-print(f"Number of individuals: {ds_gt.sizes['individuals']}")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Read predictions as movement dataset
-ds_pred = load_bboxes.from_via_tracks_file(
-    file_csv, fps=None, use_frame_numbers_from_file=False
-)
-print(ds_pred)
-
-# Print summary
-print(f"{ds_pred.source_file}")
-print(f"Number of frames: {ds_pred.sizes['time']}")
-print(f"Number of individuals: {ds_pred.sizes['individuals']}")
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Check when individuals are labelled
-# check x and y coordinates are nan at the same locations
-# TODO: change colormap to white and blue
-assert (
-    np.isnan(ds_gt.position.data[:, :, 0])
-    == np.isnan(ds_gt.position.data[:, :, 1])
-).all()
-
-fig, axs = plt.subplots(2, 1, sharex=True)
-
-axs[0].matshow(np.isnan(ds_gt.position.data[:, :, 0]).T, aspect="auto")
-axs[0].set_title("Ground truth")
-axs[0].set_xlabel("time (frames)")
-axs[0].set_ylabel("individual")
-
-axs[1].matshow(np.isnan(ds_pred.position.data[:, :, 0]).T, aspect="auto")
-axs[1].set_title("Prediction")
-axs[1].set_xlabel("time (frames)")
-axs[1].set_ylabel("tracks")
-axs[1].xaxis.tick_bottom()
-
-# # add reference
-# axs[1].hlines(
-#     y=ds_gt.sizes["individuals"],
-#     xmin=0,
-#     xmax=ds_gt.sizes["time"] - 1,
-#     color="red",
-# )
-
-fig.subplots_adjust(hspace=0.6, wspace=0.5)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Compare lengths of continuous tracks & plot distrib
-
-# for each individual, find the length of chunks between nans
-map_individuals_to_chunks = {}
-for individual in range(ds_gt.sizes["individuals"]):
-    # find nans in x-coord for that individual
-    nan_idx = np.isnan(ds_gt.position.data[:, individual, 0])
-
-    # find lengths of continuous tracks
-    len_chunks = [
-        len(list(group_iter))
-        for key, group_iter in itertools.groupby(nan_idx)
-        if not key
-    ]
-
-    map_individuals_to_chunks[individual] = len_chunks
-
 # %%
-fig, ax = plt.subplots(1, 1)
-for ind, list_chunks in map_individuals_to_chunks.items():
-    ax.scatter([ind] * len(list_chunks), list_chunks)
+# Select sample data file
+# --------------------
+# For the sake of this example, we will use the path to one of
+# the sample datasets provided with ``movement``.
 
+file_path = sample_data.fetch_dataset_paths(
+    "VIA_multiple-crabs_5-frames_labels.csv"
+)["bboxes"]
+print(file_path)
 
-# [sum(1 for _ in input) for _, input in itertools.groupby(_)]
+# %%
+# Read file as a `movement` dataset
+# ----------------------------------
+ds = load_bboxes.from_via_tracks_file(file_path)
 
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Check confidence of detections
-confidence_values = ds_pred.confidence.data.flatten()
-nan_median_confidence = np.nanmedian(confidence_values)
+# print some information about the dataset
+print(ds)
+print("-----")
+print(f"Number of individuals: {ds.sizes['individuals']}")
+print(f"Number of frames: {ds.sizes['time']}")
 
 
-fig, ax = plt.subplots(1, 1)
-hist = ax.hist(confidence_values, bins=np.arange(0, 1.01, 0.05))
-ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red")
-ax.set_aspect("auto")
+# %%
+# The dataset contains bounding boxes for 86 individuals, tracked for
+# 5 frames, in the xy plane.
+#
+# We can also see from the printout of the dataset that it contains
+# three data arrays: ``position``, ``shape`` and ``confidence``.
+#
+# We will use these three arrays in the following sections to produce
+# informative plots of the tracked trajectories
+# %%
+# Plot trajectories and color by individual
+# -----------------------------------------
 
-fig, ax = plt.subplots(1, 1)
-ax.hist(ds_pred.confidence.data.flatten(), bins=np.arange(0.6, 1.01, 0.01))
-ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red")
-ax.set_aspect("auto")
+fig, ax = plt.subplots(1, 1)  # , figsize=(15, 15))
 
-print(f"Median confidence: {nan_median_confidence}")
+# add color cycler to axes
+plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
+# get the list of colors in the cycle
+color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
 
-# %%
-# plot all trajectories
-# ds.position ---> time, individuals, space
-# why noise? remove low predictions?
 
-for ds, title in zip(
-    [ds_gt, ds_pred], ["Ground truth", "Prediction"], strict=False
-):
-    # cmap = plt.get_cmap('tab10')
-    fig, ax = plt.subplots(1, 1)
-    plt.rcParams["axes.prop_cycle"] = cycler(
-        color=plt.get_cmap("tab10").colors
+for id_idx, id_str in enumerate(ds["individuals"].data):
+    ax.scatter(
+        x=ds.position.sel(individuals=id_str, space="x").data,
+        y=ds.position.sel(individuals=id_str, space="y").data,
+        s=1,
+        color=color_cycle[id_idx % len(color_cycle)],
+    )
+    # find first frame with non-nan x-coord
+    start_frame = ds.time[
+        ~ds.position.sel(individuals="id_1", space="y").isnull().data
+    ][0]
+    ax.text(
+        x=ds.position.sel(
+            time=start_frame, individuals=id_str, space="x"
+        ).data,
+        y=ds.position.sel(
+            time=start_frame, individuals=id_str, space="y"
+        ).data,
+        s=str(id_str),
+        horizontalalignment="center",
+        color=color_cycle[id_idx % len(color_cycle)],
     )
 
-    for ind_idx in range(ds.sizes["individuals"]):
-        ax.scatter(
-            x=ds.position[:, ind_idx, 0],  # nframes, nindividuals, x
-            y=ds.position[:, ind_idx, 1],
-            s=1,
-            # c=cmap(ind_idx),
-        )
-    ax.set_aspect("equal")
-    ax.set_ylim(-150, 2500)
-    ax.set_xlabel("x (pixels)")
-    ax.set_ylabel("y (pixels)")
-    ax.set_title(title)
-    plt.show()
-
-# %%
-# first 10 individuals
-fig, ax = plt.subplots(1, 1)
-
-ax.scatter(x=ds_pred.position[:, :10, 0], y=ds_pred.position[:, :10, 1], s=1)
+ax.invert_yaxis()  # OJO!
+# ax.set_ylim(0, 2160)
+# ax.set_xlim(0, 4096)
 ax.set_aspect("equal")
 ax.set_xlabel("x (pixels)")
 ax.set_ylabel("y (pixels)")
+plt.show()
+
 # %%
-# groupby
-# It generates a break or new group every time the value of the key function
-# changes
-# input = (
-#   np.isnan(ds_gt.position.data[:,0,0]*ds_gt.position.data[:,0,1]
-#  ).astype(int))
-input = [0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]
-len_per_chunk = [
-    (key, len(list(group_iter)))
-    for key, group_iter in itertools.groupby(input)
-]
-len_per_chunk_with_1 = [
-    len(list(group_iter))
-    for key, group_iter in itertools.groupby(input)
-    if key == 1
-]

From 2cf0cb847c3941d6963873300f64cbe81a8e2827 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 13:03:57 +0100
Subject: [PATCH 03/25] Sherlock example

---
 examples/load_and_explore_bboxes_sherlock.py | 141 +++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 examples/load_and_explore_bboxes_sherlock.py

diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py
new file mode 100644
index 00000000..30bf39be
--- /dev/null
+++ b/examples/load_and_explore_bboxes_sherlock.py
@@ -0,0 +1,141 @@
+"""Load and explore bboxes tracks
+===============================
+
+Load and explore an example dataset of bounding boxes tracks.
+"""
+
+# %%
+# Imports
+# -------
+# For interactive plots: install ipympl with `pip install ipympl` and uncomment
+# the following line in your notebook
+# %matplotlib widget
+from pathlib import Path
+
+from cycler import cycler
+from matplotlib import pyplot as plt
+
+from movement.io import load_bboxes
+
+# %%
+# Select sample data file
+# --------------------
+# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
+# - Edit name of files to include `frame_`
+
+# %%
+# Read file as a `movement` dataset
+# ----------------------------------
+file_path = (
+    Path.home()
+    / "Downloads"
+    / "face_track_annotation"
+    / "data"
+    / "sherlock_ep01_tracks_FRCNN_SM.csv"
+)
+ds = load_bboxes.from_via_tracks_file(
+    str(file_path), use_frame_numbers_from_file=True
+)
+
+# restrict to first shot only
+start_end_frames_shot_1 = (11384, 11586)
+frames_shot_1 = list(
+    range(start_end_frames_shot_1[0], start_end_frames_shot_1[1], 1)
+)
+ds = ds.sel(time=frames_shot_1).copy()  # I think I need copy?
+
+# remove individuals whose position is nan for all frames in the shot
+# bool_individuals_all_nan = np.all(np.isnan(ds.position.data), axis=(0, 2))
+# ds = ds.drop_sel(individuals=ds.individuals.data[bool_individuals_all_nan])
+ds = ds.dropna(dim="individuals", how="all")
+
+# print some information about the dataset
+print(ds)
+print("-----")
+print(f"Number of individuals: {ds.sizes['individuals']}")
+print(f"Number of frames: {ds.sizes['time']}")
+
+
+# %%
+# The reduced dataset contains bounding boxes for 2 individuals, tracked for
+# 202 frames, in the xy plane.
+#
+# We can also see from the printout of the dataset that it contains
+# three data arrays: ``position``, ``shape`` and ``confidence``.
+# %%
+# Plot trajectories of first shot and color by individual
+# -----------------------------------------
+
+fig, ax = plt.subplots(1, 1)
+
+# add color cycler to axes
+plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
+# get the list of colors in the cycle
+color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+
+
+frame_number = frames_shot_1[0]
+img = plt.imread(
+    str(file_path.parents[1] / "images" / f"{frame_number:08}.jpg")
+)
+
+for id_idx, id_str in enumerate(ds["individuals"].data):
+    # plot first frame
+    ax.imshow(img)
+
+    past_frames = [f for f in frames_shot_1 if f <= frame_number]
+    future_frames = [f for f in frames_shot_1 if f > frame_number]
+
+    # plot past position of centroid in grey
+    ax.scatter(
+        x=ds.position.sel(
+            individuals=id_str, time=past_frames, space="x"
+        ).data,
+        y=ds.position.sel(
+            individuals=id_str, time=past_frames, space="y"
+        ).data,
+        s=1,
+        color="grey",
+    )
+    # plot future trajectories of centroids in color
+    ax.scatter(
+        x=ds.position.sel(
+            individuals=id_str, time=future_frames, space="x"
+        ).data,
+        y=ds.position.sel(
+            individuals=id_str, time=future_frames, space="y"
+        ).data,
+        s=1,
+        color=color_cycle[id_idx % len(color_cycle)],
+    )
+    # plot bbox in this frame
+    # ATT! currently position is the top left corner of bbox
+    # need to uncomment the line below if position loaded is centroid
+    top_left_corner = (
+        ds.position.sel(individuals=id_str, time=frame_number).data
+        # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
+    )
+    bbox = plt.Rectangle(
+        xy=tuple(top_left_corner),
+        width=ds.shape.sel(
+            individuals=id_str, time=frame_number, space="x"
+        ).data,
+        height=ds.shape.sel(
+            individuals=id_str, time=frame_number, space="y"
+        ).data,
+        edgecolor=color_cycle[id_idx % len(color_cycle)],
+        facecolor="none",  # transparent fill
+        linewidth=1.5,
+    )
+    ax.add_patch(bbox)
+
+
+# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
+# ax.invert_yaxis()
+ax.set_aspect("equal")
+ax.set_xlabel("x (pixels)")
+ax.set_ylabel("y (pixels)")
+ax.set_title(f"Sherlock - shot 1, frame {frame_number}")
+plt.show()
+
+# %%

From b847f2e2b1840eac2a42747b463de3cfece5cc0c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 12 Sep 2024 14:21:39 +0100
Subject: [PATCH 04/25] Draft example for MOCA clip

---
 examples/load_and_explore_bboxes_moca.py | 137 +++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 examples/load_and_explore_bboxes_moca.py

diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py
new file mode 100644
index 00000000..ae69a7ca
--- /dev/null
+++ b/examples/load_and_explore_bboxes_moca.py
@@ -0,0 +1,137 @@
+"""Load and explore bboxes tracks
+===============================
+
+Load and explore an example dataset of bounding boxes tracks.
+"""
+
+# %%
+# Imports
+# -------
+# For interactive plots: install ipympl with `pip install ipympl` and uncomment
+# the following line in your notebook
+# %matplotlib widget
+from pathlib import Path
+
+from cycler import cycler
+from matplotlib import pyplot as plt
+
+from movement.io import load_bboxes
+
+# %%
+# Select sample data file
+# --------------------
+# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
+# - Edit name of files to include `frame_`
+
+# %%
+# Read file as a `movement` dataset
+# ----------------------------------
+file_path = (
+    Path.home()
+    / "swc"
+    / "project_movement_dataloader"
+    / "bboxes-datasets"
+    / "MoCA"
+    / "JPEGImages"
+    / "moca_crab_1_clip.csv"
+)
+img_dir = file_path.parent / "crab_1"
+
+# %%
+ds = load_bboxes.from_via_tracks_file(
+    str(file_path),
+    use_frame_numbers_from_file=False,
+    # ATT! extracted frames are not consecutive!
+)
+
+# print some information about the dataset
+print(ds)
+print("-----")
+print(f"Number of individuals: {ds.sizes['individuals']}")
+print(f"Number of frames: {ds.sizes['time']}")
+
+
+# %%
+# The reduced dataset contains bounding boxes for 2 individuals, tracked for
+# 202 frames, in the xy plane.
+#
+# We can also see from the printout of the dataset that it contains
+# three data arrays: ``position``, ``shape`` and ``confidence``.
+# %%
+# Plot trajectories of first shot and color by individual
+# -----------------------------------------
+
+fig, ax = plt.subplots(1, 1)
+
+# add color cycler to axes
+plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
+# get the list of colors in the cycle
+color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+
+
+frame_number = 0  # ATT! extracted frames are not consecutive!
+img = plt.imread(str(img_dir / f"{frame_number:05}.jpg"))
+
+for id_idx, id_str in enumerate(ds["individuals"].data):
+    # plot frame
+    ax.imshow(img)
+
+    past_frames = [f for f in ds.time.data if f <= frame_number]
+    future_frames = [f for f in ds.time.data if f > frame_number]
+
+    # plot past position of centroid in grey
+    ax.scatter(
+        x=ds.position.sel(
+            individuals=id_str, time=past_frames, space="x"
+        ).data,
+        y=ds.position.sel(
+            individuals=id_str, time=past_frames, space="y"
+        ).data,
+        s=1,
+        color="grey",
+    )
+
+    # plot future trajectories of centroids in color
+    ax.scatter(
+        x=ds.position.sel(
+            individuals=id_str, time=future_frames, space="x"
+        ).data,
+        y=ds.position.sel(
+            individuals=id_str, time=future_frames, space="y"
+        ).data,
+        s=1,
+        color=color_cycle[id_idx % len(color_cycle)],
+    )
+
+    # plot bbox in this frame
+    # ATT! currently position is the top left corner of bbox
+    # need to uncomment the line below if position loaded is centroid
+    # (after fix)
+    top_left_corner = (
+        ds.position.sel(individuals=id_str, time=frame_number).data
+        # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
+    )
+    bbox = plt.Rectangle(
+        xy=tuple(top_left_corner),
+        width=ds.shape.sel(
+            individuals=id_str, time=frame_number, space="x"
+        ).data,
+        height=ds.shape.sel(
+            individuals=id_str, time=frame_number, space="y"
+        ).data,
+        edgecolor=color_cycle[id_idx % len(color_cycle)],
+        facecolor="none",  # transparent fill
+        linewidth=1.5,
+    )
+    ax.add_patch(bbox)
+
+
+# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
+# ax.invert_yaxis()
+ax.set_aspect("equal")
+ax.set_xlabel("x (pixels)")
+ax.set_ylabel("y (pixels)")
+ax.set_title(f"MoCA {img_dir}, frame {frame_number}")
+plt.show()
+
+# %%

From 2f236f5995ed1e1ac7b4f88cd144c677321e340a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 17 Sep 2024 16:29:58 +0100
Subject: [PATCH 05/25] Add reindex and interpolate example

---
 examples/reindex_and_interpolate.py | 77 +++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 examples/reindex_and_interpolate.py

diff --git a/examples/reindex_and_interpolate.py b/examples/reindex_and_interpolate.py
new file mode 100644
index 00000000..7c211865
--- /dev/null
+++ b/examples/reindex_and_interpolate.py
@@ -0,0 +1,77 @@
+"""Reindex and interpolate bboxes tracks
+===============================
+
+Load and explore an example dataset of bounding boxes tracks.
+"""
+
+# %%
+from movement import sample_data
+from movement.filtering import interpolate_over_time
+from movement.io import load_bboxes
+
+# %%
+# Select sample data file
+# --------------------
+# For the sake of this example, we will use the path to one of
+# the sample datasets provided with ``movement``.
+
+file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[
+    "bboxes"
+]
+print(file_path)
+
+ds = load_bboxes.from_via_tracks_file(
+    file_path, use_frame_numbers_from_file=True
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Only 1 in 5 frames are labelled!
+print(ds)
+print(ds.time)
+print(ds.position.data[:, 0, :])
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Extend the dataset to every frame by forward filling
+ds_ff = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method="ffill",  # propagate last valid index value forward
+)
+
+print(ds_ff.position.data[:, 0, :])
+print(ds_ff.shape.data[:, 0, :])
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Extend the dataset to every frame and fill empty values with nan
+ds_nan = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method=None,  # default
+)
+
+print("Position data array:")
+print(ds_nan.position.data[:11, 0, :])
+
+print("Shape data array:")
+print(ds_nan.shape.data[:11, 0, :])
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Linearly interpolate position and shape with nan
+
+ds_interp = ds_nan.copy()
+
+for data_array_str in ["position", "shape"]:
+    ds_interp[data_array_str] = interpolate_over_time(
+        data=ds_interp[data_array_str],
+        method="linear",
+        max_gap=None,
+        print_report=False,
+    )
+
+print("Position data array:")
+print(ds_interp.position.data[:11, 0, :])
+
+print("Shape data array:")
+print(ds_interp.shape.data[:11, 0, :])
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Export as csv file

From fcee8be5e6955816c6aef7cea195b87f5dc4bc6f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 10:58:02 +0000
Subject: [PATCH 06/25] Remove multi crabs example

---
 examples/load_and_explore_bboxes.py | 94 -----------------------------
 1 file changed, 94 deletions(-)
 delete mode 100644 examples/load_and_explore_bboxes.py

diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py
deleted file mode 100644
index b6443418..00000000
--- a/examples/load_and_explore_bboxes.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""Load and explore bboxes tracks
-===============================
-
-Load and explore an example dataset of bounding boxes tracks.
-"""
-
-# %%
-# Imports
-# -------
-# For interactive plots: install ipympl with `pip install ipympl` and uncomment
-# the following line in your notebook
-# %matplotlib widget
-from cycler import cycler
-from matplotlib import pyplot as plt
-
-from movement import sample_data
-from movement.io import load_bboxes
-
-# %%
-# Select sample data file
-# --------------------
-# For the sake of this example, we will use the path to one of
-# the sample datasets provided with ``movement``.
-
-file_path = sample_data.fetch_dataset_paths(
-    "VIA_multiple-crabs_5-frames_labels.csv"
-)["bboxes"]
-print(file_path)
-
-# %%
-# Read file as a `movement` dataset
-# ----------------------------------
-ds = load_bboxes.from_via_tracks_file(file_path)
-
-# print some information about the dataset
-print(ds)
-print("-----")
-print(f"Number of individuals: {ds.sizes['individuals']}")
-print(f"Number of frames: {ds.sizes['time']}")
-
-
-# %%
-# The dataset contains bounding boxes for 86 individuals, tracked for
-# 5 frames, in the xy plane.
-#
-# We can also see from the printout of the dataset that it contains
-# three data arrays: ``position``, ``shape`` and ``confidence``.
-#
-# We will use these three arrays in the following sections to produce
-# informative plots of the tracked trajectories
-# %%
-# Plot trajectories and color by individual
-# -----------------------------------------
-
-fig, ax = plt.subplots(1, 1)  # , figsize=(15, 15))
-
-# add color cycler to axes
-plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
-# get the list of colors in the cycle
-color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
-
-
-for id_idx, id_str in enumerate(ds["individuals"].data):
-    ax.scatter(
-        x=ds.position.sel(individuals=id_str, space="x").data,
-        y=ds.position.sel(individuals=id_str, space="y").data,
-        s=1,
-        color=color_cycle[id_idx % len(color_cycle)],
-    )
-    # find first frame with non-nan x-coord
-    start_frame = ds.time[
-        ~ds.position.sel(individuals="id_1", space="y").isnull().data
-    ][0]
-    ax.text(
-        x=ds.position.sel(
-            time=start_frame, individuals=id_str, space="x"
-        ).data,
-        y=ds.position.sel(
-            time=start_frame, individuals=id_str, space="y"
-        ).data,
-        s=str(id_str),
-        horizontalalignment="center",
-        color=color_cycle[id_idx % len(color_cycle)],
-    )
-
-ax.invert_yaxis()  # OJO!
-# ax.set_ylim(0, 2160)
-# ax.set_xlim(0, 4096)
-ax.set_aspect("equal")
-ax.set_xlabel("x (pixels)")
-ax.set_ylabel("y (pixels)")
-plt.show()
-
-# %%

From 8662e157e14df7328338aba770a8e77e45f92155 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 10:58:11 +0000
Subject: [PATCH 07/25] Fix sherlock example

---
 examples/load_and_explore_bboxes_sherlock.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py
index 30bf39be..559d59c9 100644
--- a/examples/load_and_explore_bboxes_sherlock.py
+++ b/examples/load_and_explore_bboxes_sherlock.py
@@ -113,7 +113,7 @@
     # need to uncomment the line below if position loaded is centroid
     top_left_corner = (
         ds.position.sel(individuals=id_str, time=frame_number).data
-        # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
+        - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
     )
     bbox = plt.Rectangle(
         xy=tuple(top_left_corner),

From d2c48ca249e4dd9f4e92ee4158080b3a597926b0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 10:58:26 +0000
Subject: [PATCH 08/25] Remove sherlock example

---
 examples/load_and_explore_bboxes_sherlock.py | 141 -------------------
 1 file changed, 141 deletions(-)
 delete mode 100644 examples/load_and_explore_bboxes_sherlock.py

diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py
deleted file mode 100644
index 559d59c9..00000000
--- a/examples/load_and_explore_bboxes_sherlock.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""Load and explore bboxes tracks
-===============================
-
-Load and explore an example dataset of bounding boxes tracks.
-"""
-
-# %%
-# Imports
-# -------
-# For interactive plots: install ipympl with `pip install ipympl` and uncomment
-# the following line in your notebook
-# %matplotlib widget
-from pathlib import Path
-
-from cycler import cycler
-from matplotlib import pyplot as plt
-
-from movement.io import load_bboxes
-
-# %%
-# Select sample data file
-# --------------------
-# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
-# - Edit name of files to include `frame_`
-
-# %%
-# Read file as a `movement` dataset
-# ----------------------------------
-file_path = (
-    Path.home()
-    / "Downloads"
-    / "face_track_annotation"
-    / "data"
-    / "sherlock_ep01_tracks_FRCNN_SM.csv"
-)
-ds = load_bboxes.from_via_tracks_file(
-    str(file_path), use_frame_numbers_from_file=True
-)
-
-# restrict to first shot only
-start_end_frames_shot_1 = (11384, 11586)
-frames_shot_1 = list(
-    range(start_end_frames_shot_1[0], start_end_frames_shot_1[1], 1)
-)
-ds = ds.sel(time=frames_shot_1).copy()  # I think I need copy?
-
-# remove individuals whose position is nan for all frames in the shot
-# bool_individuals_all_nan = np.all(np.isnan(ds.position.data), axis=(0, 2))
-# ds = ds.drop_sel(individuals=ds.individuals.data[bool_individuals_all_nan])
-ds = ds.dropna(dim="individuals", how="all")
-
-# print some information about the dataset
-print(ds)
-print("-----")
-print(f"Number of individuals: {ds.sizes['individuals']}")
-print(f"Number of frames: {ds.sizes['time']}")
-
-
-# %%
-# The reduced dataset contains bounding boxes for 2 individuals, tracked for
-# 202 frames, in the xy plane.
-#
-# We can also see from the printout of the dataset that it contains
-# three data arrays: ``position``, ``shape`` and ``confidence``.
-# %%
-# Plot trajectories of first shot and color by individual
-# -----------------------------------------
-
-fig, ax = plt.subplots(1, 1)
-
-# add color cycler to axes
-plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
-# get the list of colors in the cycle
-color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
-
-
-frame_number = frames_shot_1[0]
-img = plt.imread(
-    str(file_path.parents[1] / "images" / f"{frame_number:08}.jpg")
-)
-
-for id_idx, id_str in enumerate(ds["individuals"].data):
-    # plot first frame
-    ax.imshow(img)
-
-    past_frames = [f for f in frames_shot_1 if f <= frame_number]
-    future_frames = [f for f in frames_shot_1 if f > frame_number]
-
-    # plot past position of centroid in grey
-    ax.scatter(
-        x=ds.position.sel(
-            individuals=id_str, time=past_frames, space="x"
-        ).data,
-        y=ds.position.sel(
-            individuals=id_str, time=past_frames, space="y"
-        ).data,
-        s=1,
-        color="grey",
-    )
-    # plot future trajectories of centroids in color
-    ax.scatter(
-        x=ds.position.sel(
-            individuals=id_str, time=future_frames, space="x"
-        ).data,
-        y=ds.position.sel(
-            individuals=id_str, time=future_frames, space="y"
-        ).data,
-        s=1,
-        color=color_cycle[id_idx % len(color_cycle)],
-    )
-    # plot bbox in this frame
-    # ATT! currently position is the top left corner of bbox
-    # need to uncomment the line below if position loaded is centroid
-    top_left_corner = (
-        ds.position.sel(individuals=id_str, time=frame_number).data
-        - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
-    )
-    bbox = plt.Rectangle(
-        xy=tuple(top_left_corner),
-        width=ds.shape.sel(
-            individuals=id_str, time=frame_number, space="x"
-        ).data,
-        height=ds.shape.sel(
-            individuals=id_str, time=frame_number, space="y"
-        ).data,
-        edgecolor=color_cycle[id_idx % len(color_cycle)],
-        facecolor="none",  # transparent fill
-        linewidth=1.5,
-    )
-    ax.add_patch(bbox)
-
-
-# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
-# ax.invert_yaxis()
-ax.set_aspect("equal")
-ax.set_xlabel("x (pixels)")
-ax.set_ylabel("y (pixels)")
-ax.set_title(f"Sherlock - shot 1, frame {frame_number}")
-plt.show()
-
-# %%

From 39d3960ac1c37e7d983a7d668780add900a7123e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 11:18:15 +0000
Subject: [PATCH 09/25] Fix moca clip centroid vs top left corner

---
 examples/load_and_explore_bboxes_moca.py | 155 +++++++++++------------
 1 file changed, 73 insertions(+), 82 deletions(-)

diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py
index ae69a7ca..ac32d1df 100644
--- a/examples/load_and_explore_bboxes_moca.py
+++ b/examples/load_and_explore_bboxes_moca.py
@@ -10,34 +10,25 @@
 # For interactive plots: install ipympl with `pip install ipympl` and uncomment
 # the following line in your notebook
 # %matplotlib widget
-from pathlib import Path
+# from pathlib import Path
 
 from cycler import cycler
 from matplotlib import pyplot as plt
 
+from movement import sample_data
 from movement.io import load_bboxes
 
 # %%
 # Select sample data file
 # --------------------
-# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html
-# - Edit name of files to include `frame_`
+file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[
+    "bboxes"
+]
+print(file_path)
 
 # %%
 # Read file as a `movement` dataset
 # ----------------------------------
-file_path = (
-    Path.home()
-    / "swc"
-    / "project_movement_dataloader"
-    / "bboxes-datasets"
-    / "MoCA"
-    / "JPEGImages"
-    / "moca_crab_1_clip.csv"
-)
-img_dir = file_path.parent / "crab_1"
-
-# %%
 ds = load_bboxes.from_via_tracks_file(
     str(file_path),
     use_frame_numbers_from_file=False,
@@ -52,14 +43,14 @@
 
 
 # %%
-# The reduced dataset contains bounding boxes for 2 individuals, tracked for
-# 202 frames, in the xy plane.
+# The dataset contains bounding boxes for 1 individual, tracked for
+# 35 frames, in the xy plane.
 #
 # We can also see from the printout of the dataset that it contains
 # three data arrays: ``position``, ``shape`` and ``confidence``.
 # %%
 # Plot trajectories of first shot and color by individual
-# -----------------------------------------
+# -------------------------------------------------------
 
 fig, ax = plt.subplots(1, 1)
 
@@ -69,69 +60,69 @@
 color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
 
 
-frame_number = 0  # ATT! extracted frames are not consecutive!
-img = plt.imread(str(img_dir / f"{frame_number:05}.jpg"))
-
-for id_idx, id_str in enumerate(ds["individuals"].data):
-    # plot frame
-    ax.imshow(img)
-
-    past_frames = [f for f in ds.time.data if f <= frame_number]
-    future_frames = [f for f in ds.time.data if f > frame_number]
-
-    # plot past position of centroid in grey
-    ax.scatter(
-        x=ds.position.sel(
-            individuals=id_str, time=past_frames, space="x"
-        ).data,
-        y=ds.position.sel(
-            individuals=id_str, time=past_frames, space="y"
-        ).data,
-        s=1,
-        color="grey",
-    )
-
-    # plot future trajectories of centroids in color
-    ax.scatter(
-        x=ds.position.sel(
-            individuals=id_str, time=future_frames, space="x"
-        ).data,
-        y=ds.position.sel(
-            individuals=id_str, time=future_frames, space="y"
-        ).data,
-        s=1,
-        color=color_cycle[id_idx % len(color_cycle)],
-    )
-
-    # plot bbox in this frame
-    # ATT! currently position is the top left corner of bbox
-    # need to uncomment the line below if position loaded is centroid
-    # (after fix)
-    top_left_corner = (
-        ds.position.sel(individuals=id_str, time=frame_number).data
-        # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
-    )
-    bbox = plt.Rectangle(
-        xy=tuple(top_left_corner),
-        width=ds.shape.sel(
-            individuals=id_str, time=frame_number, space="x"
-        ).data,
-        height=ds.shape.sel(
-            individuals=id_str, time=frame_number, space="y"
-        ).data,
-        edgecolor=color_cycle[id_idx % len(color_cycle)],
-        facecolor="none",  # transparent fill
-        linewidth=1.5,
-    )
-    ax.add_patch(bbox)
-
-
-# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
-# ax.invert_yaxis()
-ax.set_aspect("equal")
-ax.set_xlabel("x (pixels)")
-ax.set_ylabel("y (pixels)")
-ax.set_title(f"MoCA {img_dir}, frame {frame_number}")
-plt.show()
+# frame_number = 0  # ATT! extracted frames are not consecutive!
+# img = plt.imread(str(img_dir / f"{frame_number:05}.jpg"))
+
+# for id_idx, id_str in enumerate(ds["individuals"].data):
+#     # plot frame
+#     ax.imshow(img)
+
+#     past_frames = [f for f in ds.time.data if f <= frame_number]
+#     future_frames = [f for f in ds.time.data if f > frame_number]
+
+#     # plot past position of centroid in grey
+#     ax.scatter(
+#         x=ds.position.sel(
+#             individuals=id_str, time=past_frames, space="x"
+#         ).data,
+#         y=ds.position.sel(
+#             individuals=id_str, time=past_frames, space="y"
+#         ).data,
+#         s=1,
+#         color="grey",
+#     )
+
+#     # plot future trajectories of centroids in color
+#     ax.scatter(
+#         x=ds.position.sel(
+#             individuals=id_str, time=future_frames, space="x"
+#         ).data,
+#         y=ds.position.sel(
+#             individuals=id_str, time=future_frames, space="y"
+#         ).data,
+#         s=1,
+#         color=color_cycle[id_idx % len(color_cycle)],
+#     )
+
+#     # plot bbox in this frame
+#     # ATT! currently position is the top left corner of bbox
+#     # need to uncomment the line below if position loaded is centroid
+#     # (after fix)
+#     top_left_corner = (
+#         ds.position.sel(individuals=id_str, time=frame_number).data
+#         - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
+#     )
+#     bbox = plt.Rectangle(
+#         xy=tuple(top_left_corner),
+#         width=ds.shape.sel(
+#             individuals=id_str, time=frame_number, space="x"
+#         ).data,
+#         height=ds.shape.sel(
+#             individuals=id_str, time=frame_number, space="y"
+#         ).data,
+#         edgecolor=color_cycle[id_idx % len(color_cycle)],
+#         facecolor="none",  # transparent fill
+#         linewidth=1.5,
+#     )
+#     ax.add_patch(bbox)
+
+
+# # ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
+# # ax.invert_yaxis()
+# ax.set_aspect("equal")
+# ax.set_xlabel("x (pixels)")
+# ax.set_ylabel("y (pixels)")
+# ax.set_title(f"MoCA {img_dir}, frame {frame_number}")
+# plt.show()
 
 # %%

From 4174b6d35c9ce151ce9bd3644bce95c7cceba59f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 13:48:33 +0000
Subject: [PATCH 10/25] Add plot to reindexing example and rename

---
 examples/reindex_and_interpolate.py        |  77 --------
 examples/reindex_and_interpolate_bboxes.py | 220 +++++++++++++++++++++
 2 files changed, 220 insertions(+), 77 deletions(-)
 delete mode 100644 examples/reindex_and_interpolate.py
 create mode 100644 examples/reindex_and_interpolate_bboxes.py

diff --git a/examples/reindex_and_interpolate.py b/examples/reindex_and_interpolate.py
deleted file mode 100644
index 7c211865..00000000
--- a/examples/reindex_and_interpolate.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Reindex and interpolate bboxes tracks
-===============================
-
-Load and explore an example dataset of bounding boxes tracks.
-"""
-
-# %%
-from movement import sample_data
-from movement.filtering import interpolate_over_time
-from movement.io import load_bboxes
-
-# %%
-# Select sample data file
-# --------------------
-# For the sake of this example, we will use the path to one of
-# the sample datasets provided with ``movement``.
-
-file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[
-    "bboxes"
-]
-print(file_path)
-
-ds = load_bboxes.from_via_tracks_file(
-    file_path, use_frame_numbers_from_file=True
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Only 1 in 5 frames are labelled!
-print(ds)
-print(ds.time)
-print(ds.position.data[:, 0, :])
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Extend the dataset to every frame by forward filling
-ds_ff = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
-    method="ffill",  # propagate last valid index value forward
-)
-
-print(ds_ff.position.data[:, 0, :])
-print(ds_ff.shape.data[:, 0, :])
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Extend the dataset to every frame and fill empty values with nan
-ds_nan = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
-    method=None,  # default
-)
-
-print("Position data array:")
-print(ds_nan.position.data[:11, 0, :])
-
-print("Shape data array:")
-print(ds_nan.shape.data[:11, 0, :])
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Linearly interpolate position and shape with nan
-
-ds_interp = ds_nan.copy()
-
-for data_array_str in ["position", "shape"]:
-    ds_interp[data_array_str] = interpolate_over_time(
-        data=ds_interp[data_array_str],
-        method="linear",
-        max_gap=None,
-        print_report=False,
-    )
-
-print("Position data array:")
-print(ds_interp.position.data[:11, 0, :])
-
-print("Shape data array:")
-print(ds_interp.shape.data[:11, 0, :])
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Export as csv file
diff --git a/examples/reindex_and_interpolate_bboxes.py b/examples/reindex_and_interpolate_bboxes.py
new file mode 100644
index 00000000..cdddba8f
--- /dev/null
+++ b/examples/reindex_and_interpolate_bboxes.py
@@ -0,0 +1,220 @@
+"""Reindex and interpolate bounding boxes tracks
+===============================
+
+Load an example dataset of bounding boxes' tracks and reindex
+it to every frame.
+"""
+
+# %%
+import math
+
+import sleap_io as sio
+from cycler import cycler
+from matplotlib import pyplot as plt
+
+from movement import sample_data
+from movement.filtering import interpolate_over_time
+from movement.io import load_bboxes
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Select sample data file
+# --------------------
+# For this example, we will use the path to one of
+# the sample datasets provided with ``movement``.
+
+dataset_dict = sample_data.fetch_dataset_paths(
+    "VIA_single-crab_MOCA-crab-1.csv",
+    with_video=True,  # for visualisation
+)
+
+file_path = dataset_dict["bboxes"]
+print(file_path)
+
+ds = load_bboxes.from_via_tracks_file(
+    file_path, use_frame_numbers_from_file=True
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Only 1 in 5 frames are annotated, plus the last frame (167)
+print(ds)
+print("-----")
+print(ds.time)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Extend the dataset to every frame by forward filling
+# The position and shape data arrays are filled with the last valid value
+# So position and shape are kept constant when no annotation is available
+ds_ff = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method="ffill",  # propagate last valid index value forward
+)
+
+print("Position data array (first 14 frames):")
+print(ds_ff.position.data[:14, 0, :])  # time, individual, space
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_ff.shape.data[:14, 0, :])  # time, individual, space
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Extend the dataset to every frame and fill empty values with nan
+ds_nan = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method=None,  # default
+)
+
+print("Position data array (first 14 frames):")
+print(ds_nan.position.data[:14, 0, :])
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_nan.shape.data[:14, 0, :])
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Linearly interpolate position and shape with nan
+
+ds_interp = ds_nan.copy()
+
+for data_array_str in ["position", "shape"]:
+    ds_interp[data_array_str] = interpolate_over_time(
+        data=ds_interp[data_array_str],
+        method="linear",
+        max_gap=None,
+        print_report=False,
+    )
+
+print("Position data array (first 14 frames):")
+print(ds_interp.position.data[:14, 0, :])
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_interp.shape.data[:14, 0, :])
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Inspect associated video
+
+video_path = dataset_dict["video"]
+
+
+video = sio.load_video(video_path)
+
+n_frames, height, width, channels = video.shape
+
+print(f"Number of frames: {n_frames}")  # The video contains all frames
+print(f"Frame size: {width}x{height}")
+print(f"Number of channels: {channels}")
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Plot data
+# OJO camera movement
+
+# select indices of data to plot
+data_start_idx = 0
+data_end_idx = 11
+
+# initialise figure
+fig = plt.figure(figsize=(15, 12))
+
+# add color cycler to axes
+plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
+color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+
+# loop over data and plot over corresponding frame
+for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
+    # add subplot axes
+    ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1)
+
+    # plot frame
+    ax.imshow(
+        video[ds.time[data_idx].item()]
+    )  # the video is indexed at every frame! use frame number as index
+
+    # plot annotated boxes
+    top_left_corner = (
+        ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2
+    )
+    bbox = plt.Rectangle(
+        xy=tuple(top_left_corner),
+        width=ds.shape[data_idx, 0, 0].data,  # x coord
+        height=ds.shape[data_idx, 0, 1].data,  # y coord of shape array
+        edgecolor=color_cycle[0],  # [data_idx % len(color_cycle)],
+        facecolor="none",  # transparent fill
+        linewidth=1.5,
+    )
+    ax.add_patch(bbox)
+
+    ax.set_title(f"Frame {ds.time[data_idx].item()}")
+
+fig.tight_layout()
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Compare interpolation methods
+
+# select frames to inspect
+frame_number_start = 0
+frame_number_end = 6
+
+# add color cycler to axes
+plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
+color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+
+
+# initialise figure
+fig = plt.figure(figsize=(15, 12))
+
+
+# loop over data and plot over corresponding frame
+for frame_n in range(frame_number_start, frame_number_end):
+    # add subplot axes
+    ax = plt.subplot(1, 6, frame_n + 1)
+
+    # plot frame
+    ax.imshow(video[frame_n])
+    # the video is indexed at every frame! use frame number as index
+
+    # plot bounding box: box and centroid
+    for ds_i, ds in enumerate([ds_nan, ds_ff, ds_interp]):
+        # plot box
+        top_left_corner = (
+            ds.position.sel(time=frame_n, individuals="id_1").data
+            - ds.shape.sel(time=frame_n, individuals="id_1").data / 2
+        )
+        bbox = plt.Rectangle(
+            xy=tuple(top_left_corner),
+            width=ds.shape.sel(
+                time=frame_n, individuals="id_1", space="x"
+            ).data,  # x coord
+            height=ds.shape.sel(
+                time=frame_n, individuals="id_1", space="y"
+            ).data,  # y coord of shape array
+            edgecolor=color_cycle[ds_i],
+            facecolor="none",  # transparent fill
+            linewidth=[4.5, 1.5, 1.5][ds_i],
+            linestyle=["dotted", "solid", "solid"][ds_i],
+            label=["nan", "ffill", "linear"][ds_i],
+        )
+        ax.add_patch(bbox)
+
+        # plot centroid
+        ax.scatter(
+            x=ds.position.sel(
+                time=frame_n, individuals="id_1", space="x"
+            ).data,
+            y=ds.position.sel(
+                time=frame_n, individuals="id_1", space="y"
+            ).data,
+            s=5,
+            color=color_cycle[ds_i],
+        )
+
+    if frame_n == 0:
+        ax.legend()
+    ax.set_title(f"Frame {frame_n}")
+
+fig.tight_layout()
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Export as csv file

From 3d8331958c539435b5e90de919a4db2d5382e8b3 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:16:13 +0000
Subject: [PATCH 11/25] Format for example and rename

---
 examples/load_and_explore_bboxes_moca.py   | 128 --------
 examples/load_and_reindex_bboxes.py        | 323 +++++++++++++++++++++
 examples/reindex_and_interpolate_bboxes.py | 220 --------------
 3 files changed, 323 insertions(+), 348 deletions(-)
 delete mode 100644 examples/load_and_explore_bboxes_moca.py
 create mode 100644 examples/load_and_reindex_bboxes.py
 delete mode 100644 examples/reindex_and_interpolate_bboxes.py

diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py
deleted file mode 100644
index ac32d1df..00000000
--- a/examples/load_and_explore_bboxes_moca.py
+++ /dev/null
@@ -1,128 +0,0 @@
-"""Load and explore bboxes tracks
-===============================
-
-Load and explore an example dataset of bounding boxes tracks.
-"""
-
-# %%
-# Imports
-# -------
-# For interactive plots: install ipympl with `pip install ipympl` and uncomment
-# the following line in your notebook
-# %matplotlib widget
-# from pathlib import Path
-
-from cycler import cycler
-from matplotlib import pyplot as plt
-
-from movement import sample_data
-from movement.io import load_bboxes
-
-# %%
-# Select sample data file
-# --------------------
-file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[
-    "bboxes"
-]
-print(file_path)
-
-# %%
-# Read file as a `movement` dataset
-# ----------------------------------
-ds = load_bboxes.from_via_tracks_file(
-    str(file_path),
-    use_frame_numbers_from_file=False,
-    # ATT! extracted frames are not consecutive!
-)
-
-# print some information about the dataset
-print(ds)
-print("-----")
-print(f"Number of individuals: {ds.sizes['individuals']}")
-print(f"Number of frames: {ds.sizes['time']}")
-
-
-# %%
-# The dataset contains bounding boxes for 1 individual, tracked for
-# 35 frames, in the xy plane.
-#
-# We can also see from the printout of the dataset that it contains
-# three data arrays: ``position``, ``shape`` and ``confidence``.
-# %%
-# Plot trajectories of first shot and color by individual
-# -------------------------------------------------------
-
-fig, ax = plt.subplots(1, 1)
-
-# add color cycler to axes
-plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
-# get the list of colors in the cycle
-color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
-
-
-# frame_number = 0  # ATT! extracted frames are not consecutive!
-# img = plt.imread(str(img_dir / f"{frame_number:05}.jpg"))
-
-# for id_idx, id_str in enumerate(ds["individuals"].data):
-#     # plot frame
-#     ax.imshow(img)
-
-#     past_frames = [f for f in ds.time.data if f <= frame_number]
-#     future_frames = [f for f in ds.time.data if f > frame_number]
-
-#     # plot past position of centroid in grey
-#     ax.scatter(
-#         x=ds.position.sel(
-#             individuals=id_str, time=past_frames, space="x"
-#         ).data,
-#         y=ds.position.sel(
-#             individuals=id_str, time=past_frames, space="y"
-#         ).data,
-#         s=1,
-#         color="grey",
-#     )
-
-#     # plot future trajectories of centroids in color
-#     ax.scatter(
-#         x=ds.position.sel(
-#             individuals=id_str, time=future_frames, space="x"
-#         ).data,
-#         y=ds.position.sel(
-#             individuals=id_str, time=future_frames, space="y"
-#         ).data,
-#         s=1,
-#         color=color_cycle[id_idx % len(color_cycle)],
-#     )
-
-#     # plot bbox in this frame
-#     # ATT! currently position is the top left corner of bbox
-#     # need to uncomment the line below if position loaded is centroid
-#     # (after fix)
-#     top_left_corner = (
-#         ds.position.sel(individuals=id_str, time=frame_number).data
-#         - ds.shape.sel(individuals=id_str, time=frame_number).data / 2
-#     )
-#     bbox = plt.Rectangle(
-#         xy=tuple(top_left_corner),
-#         width=ds.shape.sel(
-#             individuals=id_str, time=frame_number, space="x"
-#         ).data,
-#         height=ds.shape.sel(
-#             individuals=id_str, time=frame_number, space="y"
-#         ).data,
-#         edgecolor=color_cycle[id_idx % len(color_cycle)],
-#         facecolor="none",  # transparent fill
-#         linewidth=1.5,
-#     )
-#     ax.add_patch(bbox)
-
-
-# # ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0))
-# # ax.invert_yaxis()
-# ax.set_aspect("equal")
-# ax.set_xlabel("x (pixels)")
-# ax.set_ylabel("y (pixels)")
-# ax.set_title(f"MoCA {img_dir}, frame {frame_number}")
-# plt.show()
-
-# %%
diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
new file mode 100644
index 00000000..c1798ff5
--- /dev/null
+++ b/examples/load_and_reindex_bboxes.py
@@ -0,0 +1,323 @@
+"""Load and reindex bounding boxes tracks
+==========================================
+
+Load an example dataset of bounding boxes' tracks and reindex
+it to every frame.
+"""
+
+# %%
+# Imports
+# -------
+
+# For interactive plots: install ipympl with `pip install ipympl` and uncomment
+# the following line in your notebook
+# %matplotlib widget
+import csv
+import math
+
+import sleap_io as sio
+from matplotlib import pyplot as plt
+
+from movement import sample_data
+from movement.filtering import interpolate_over_time
+from movement.io import load_bboxes
+
+# %%
+# Load sample dataset
+# ------------------------
+# In this tutorial, we will use a sample bounding boxes dataset with
+# a single individual (a crab).
+#
+# We will also download the associated video for visualising the data later.
+
+dataset_dict = sample_data.fetch_dataset_paths(
+    "VIA_single-crab_MOCA-crab-1.csv",
+    with_video=True,  # download associated video
+)
+
+file_path = dataset_dict["bboxes"]
+print(file_path)
+
+ds = load_bboxes.from_via_tracks_file(
+    file_path, use_frame_numbers_from_file=True
+)
+
+# %%
+# The loaded dataset is made up of three data arrays:
+# ``position``, ``shape``, and ``confidence``.
+print(ds)
+
+# %%
+# We can see the coordinates in the time dimension are expressed in frames,
+# and that only 1 in 5 frames of the video are annotated, plus
+# the last frame (167).
+#
+# In the following sections of the notebook we will explore options to reindex
+# the dataset, and fill in the missing frames with reasonable values.
+print(ds.time)
+
+# %%
+# Inspect associated video
+# --------------------------------
+# The video associated to the data contains all 168 frames.
+
+video_path = dataset_dict["video"]
+
+video = sio.load_video(video_path)
+n_frames, height, width, channels = video.shape
+
+print(f"Number of frames: {n_frames}")
+print(f"Frame size: {width}x{height}")
+print(f"Number of channels: {channels}")
+
+
+# %%
+# We can plot the data over the corresponding video frames to
+# visualise the bounding boxes around the tracked crab.
+#
+# Let's focus on the first 15 frames of the video, and plot the annotated
+# bounding box and centroid at each frame. The centroid at each frame is
+# marked as a blue marker with a red ring. The past centroid positions are
+# shown in blue and the future centroid positions in white.
+#
+# Note that in this case the camera is not static relative to the scene.
+
+# select indices of data to plot
+data_start_idx = 0
+data_end_idx = 15
+
+# initialise figure
+fig = plt.figure(figsize=(15, 12))
+
+# get list of colors for plotting
+list_colors = plt.get_cmap("tab10").colors
+
+# loop over data and plot over corresponding frame
+for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
+    # add subplot axes
+    ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1)
+
+    # plot frame
+    # note: the video is indexed at every frame, so
+    # we use the frame number as index
+    ax.imshow(video[ds.time[data_idx].item()])
+
+    # plot box at this frame
+    top_left_corner = (
+        ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2
+    )
+    bbox = plt.Rectangle(
+        xy=tuple(top_left_corner),
+        width=ds.shape[data_idx, 0, 0].data,  # x coordinate of shape array
+        height=ds.shape[data_idx, 0, 1].data,  # y coordinate of shape array
+        edgecolor=list_colors[0],
+        facecolor="none",
+        linewidth=1.5,
+    )
+    ax.add_patch(bbox)
+
+    # plot box's centroid at this frame with red ring
+    ax.scatter(
+        x=ds.position[data_idx, 0, 0].data,
+        y=ds.position[data_idx, 0, 1].data,
+        s=15,
+        color=list_colors[0],
+        edgecolors="red",
+    )
+
+    # plot past centroid positions in blue
+    ax.scatter(
+        x=ds.position[:data_idx, 0, 0].data,
+        y=ds.position[:data_idx, 0, 1].data,
+        s=5,
+        color=list_colors[0],
+    )
+
+    # plot future centroid positionsin white
+    ax.scatter(
+        x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data,
+        y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data,
+        s=5,
+        color="white",
+    )
+
+    ax.set_title(f"Frame {ds.time[data_idx].item()}")
+    ax.set_xlabel("x (pixles)")
+    ax.set_ylabel("y (pixels)")
+    ax.set_xlabel("")
+
+fig.tight_layout()
+
+
+# %%
+# Fill in empty values with forward filling
+# ----------------------------------------------------
+# We can fill in the frames with missing values for the  ``position`` and
+# ``shape`` arrays by taking the last valid value in time. In this way, a
+# box's position and shape stay constant if for a current frame the box
+# has no annotation defined.
+
+ds_ff = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method="ffill",  # propagate last valid index value forward
+)
+
+# check the first 14 frames of the data
+print("Position data array (first 14 frames):")
+print(ds_ff.position.data[:14, 0, :])  # time, individual, space
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_ff.shape.data[:14, 0, :])  # time, individual, space
+
+# %%
+# Fill in empty values with NaN
+# ----------------------------------------------------
+# Alternatively, we can fill in the missing frames with NaN values.
+# This can be useful if we want to interpolate the missing values later.
+ds_nan = ds.reindex(
+    {"time": list(range(ds.time[-1].item()))},
+    method=None,  # default
+)
+
+# check the first 14 frames of the data
+print("Position data array (first 14 frames):")
+print(ds_nan.position.data[:14, 0, :])
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_nan.shape.data[:14, 0, :])
+
+# %%
+# Linearly interpolate NaN values
+# ----------------------------------------------------------
+# We can instead fill in the missing values in the dataset applying linear
+# interpolation to the ``position`` and ``shape`` data arrays. In this way,
+# we would be assuming that the centroid of the bounding box moves linearly
+# between the two annotated values, and its width and height change linearly
+# as well.
+#
+# We use the dataset with NaN values as an input to the
+# ``interpolate_over_time`` function.
+ds_interp = ds_nan.copy()
+
+for data_array_str in ["position", "shape"]:
+    ds_interp[data_array_str] = interpolate_over_time(
+        data=ds_interp[data_array_str],
+        method="linear",
+        max_gap=None,
+        print_report=False,
+    )
+
+# check the first 14 frames of the data
+print("Position data array (first 14 frames):")
+print(ds_interp.position.data[:14, 0, :])
+
+print("----")
+print("Shape data array (first 14 frames):")
+print(ds_interp.shape.data[:14, 0, :])
+
+
+# %%
+# Compare interpolation methods
+# ------------------------------
+# We can now qualitatively compare the three different methods of filling
+# in the missing frames, by plotting the bounding boxes
+# for the first 6 frames of the video.
+#
+# Remember only frames 0 and 5 are annotated in the original dataset. These
+# are plotted in blue, while the forward filled values are plotted in orange
+# and the linearly interpolated values in green.
+
+# initialise figure
+fig = plt.figure(figsize=(15, 12))
+
+# loop over frames
+for frame_n in range(5):
+    # add subplot axes
+    ax = plt.subplot(1, 6, frame_n + 1)
+
+    # plot frame
+    # note: the video is indexed at every frame, so
+    # we use the frame number as index
+    ax.imshow(video[frame_n])
+
+    # plot bounding box for each dataset
+    for ds_i, ds_one in enumerate([ds_nan, ds_ff, ds_interp]):
+        # plot box
+        top_left_corner = (
+            ds_one.position.sel(time=frame_n, individuals="id_1").data
+            - ds_one.shape.sel(time=frame_n, individuals="id_1").data / 2
+        )
+        bbox = plt.Rectangle(
+            xy=tuple(top_left_corner),
+            width=ds_one.shape.sel(
+                time=frame_n, individuals="id_1", space="x"
+            ).data,
+            height=ds_one.shape.sel(
+                time=frame_n, individuals="id_1", space="y"
+            ).data,
+            edgecolor=list_colors[ds_i],
+            facecolor="none",
+            # make line for NaN dataset thicker and dotted
+            linewidth=[5, 1.5, 1.5][ds_i],
+            linestyle=["dotted", "solid", "solid"][ds_i],
+            label=["nan", "ffill", "linear"][ds_i],
+        )
+        ax.add_patch(bbox)
+
+        # plot centroid
+        ax.scatter(
+            x=ds_one.position.sel(
+                time=frame_n, individuals="id_1", space="x"
+            ).data,
+            y=ds_one.position.sel(
+                time=frame_n, individuals="id_1", space="y"
+            ).data,
+            s=5,
+            color=list_colors[ds_i],
+        )
+
+    # add legend to first frame
+    if frame_n == 0:
+        ax.legend()
+    ax.set_title(f"Frame {frame_n}")
+
+fig.tight_layout()
+
+# %%
+# Export as .csv file
+# -------------------
+# Let's assume the dataset with the forward filled values is the best suited
+# for our task - we can now export the computed values to a .csv file
+#
+# Note that we currently do not provide explicit methods to export a
+# ``movement`` bounding boxes dataset in a specific format. However, we can
+# easily save the bounding boxes’ trajectories to a .csv file using the
+# standard Python library ``csv``.
+
+# define name for output csv file
+filepath = "tracking_output.csv"
+
+# open the csv file in write mode
+with open(filepath, mode="w", newline="") as file:
+    writer = csv.writer(file)
+
+    # write the header
+    writer.writerow(
+        ["frame_idx", "bbox_ID", "x", "y", "width", "height", "confidence"]
+    )
+
+    # write the data
+    for individual in ds.individuals.data:
+        for frame in ds.time.data:
+            x, y = ds.position.sel(time=frame, individuals=individual).data
+            width, height = ds.shape.sel(
+                time=frame, individuals=individual
+            ).data
+            confidence = ds.confidence.sel(
+                time=frame, individuals=individual
+            ).data
+            writer.writerow(
+                [frame, individual, x, y, width, height, confidence]
+            )
diff --git a/examples/reindex_and_interpolate_bboxes.py b/examples/reindex_and_interpolate_bboxes.py
deleted file mode 100644
index cdddba8f..00000000
--- a/examples/reindex_and_interpolate_bboxes.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""Reindex and interpolate bounding boxes tracks
-===============================
-
-Load an example dataset of bounding boxes' tracks and reindex
-it to every frame.
-"""
-
-# %%
-import math
-
-import sleap_io as sio
-from cycler import cycler
-from matplotlib import pyplot as plt
-
-from movement import sample_data
-from movement.filtering import interpolate_over_time
-from movement.io import load_bboxes
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Select sample data file
-# --------------------
-# For this example, we will use the path to one of
-# the sample datasets provided with ``movement``.
-
-dataset_dict = sample_data.fetch_dataset_paths(
-    "VIA_single-crab_MOCA-crab-1.csv",
-    with_video=True,  # for visualisation
-)
-
-file_path = dataset_dict["bboxes"]
-print(file_path)
-
-ds = load_bboxes.from_via_tracks_file(
-    file_path, use_frame_numbers_from_file=True
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Only 1 in 5 frames are annotated, plus the last frame (167)
-print(ds)
-print("-----")
-print(ds.time)
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Extend the dataset to every frame by forward filling
-# The position and shape data arrays are filled with the last valid value
-# So position and shape are kept constant when no annotation is available
-ds_ff = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
-    method="ffill",  # propagate last valid index value forward
-)
-
-print("Position data array (first 14 frames):")
-print(ds_ff.position.data[:14, 0, :])  # time, individual, space
-
-print("----")
-print("Shape data array (first 14 frames):")
-print(ds_ff.shape.data[:14, 0, :])  # time, individual, space
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Extend the dataset to every frame and fill empty values with nan
-ds_nan = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
-    method=None,  # default
-)
-
-print("Position data array (first 14 frames):")
-print(ds_nan.position.data[:14, 0, :])
-
-print("----")
-print("Shape data array (first 14 frames):")
-print(ds_nan.shape.data[:14, 0, :])
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Linearly interpolate position and shape with nan
-
-ds_interp = ds_nan.copy()
-
-for data_array_str in ["position", "shape"]:
-    ds_interp[data_array_str] = interpolate_over_time(
-        data=ds_interp[data_array_str],
-        method="linear",
-        max_gap=None,
-        print_report=False,
-    )
-
-print("Position data array (first 14 frames):")
-print(ds_interp.position.data[:14, 0, :])
-
-print("----")
-print("Shape data array (first 14 frames):")
-print(ds_interp.shape.data[:14, 0, :])
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Inspect associated video
-
-video_path = dataset_dict["video"]
-
-
-video = sio.load_video(video_path)
-
-n_frames, height, width, channels = video.shape
-
-print(f"Number of frames: {n_frames}")  # The video contains all frames
-print(f"Frame size: {width}x{height}")
-print(f"Number of channels: {channels}")
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Plot data
-# OJO camera movement
-
-# select indices of data to plot
-data_start_idx = 0
-data_end_idx = 11
-
-# initialise figure
-fig = plt.figure(figsize=(15, 12))
-
-# add color cycler to axes
-plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
-color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
-
-# loop over data and plot over corresponding frame
-for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
-    # add subplot axes
-    ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1)
-
-    # plot frame
-    ax.imshow(
-        video[ds.time[data_idx].item()]
-    )  # the video is indexed at every frame! use frame number as index
-
-    # plot annotated boxes
-    top_left_corner = (
-        ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2
-    )
-    bbox = plt.Rectangle(
-        xy=tuple(top_left_corner),
-        width=ds.shape[data_idx, 0, 0].data,  # x coord
-        height=ds.shape[data_idx, 0, 1].data,  # y coord of shape array
-        edgecolor=color_cycle[0],  # [data_idx % len(color_cycle)],
-        facecolor="none",  # transparent fill
-        linewidth=1.5,
-    )
-    ax.add_patch(bbox)
-
-    ax.set_title(f"Frame {ds.time[data_idx].item()}")
-
-fig.tight_layout()
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Compare interpolation methods
-
-# select frames to inspect
-frame_number_start = 0
-frame_number_end = 6
-
-# add color cycler to axes
-plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors)
-color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
-
-
-# initialise figure
-fig = plt.figure(figsize=(15, 12))
-
-
-# loop over data and plot over corresponding frame
-for frame_n in range(frame_number_start, frame_number_end):
-    # add subplot axes
-    ax = plt.subplot(1, 6, frame_n + 1)
-
-    # plot frame
-    ax.imshow(video[frame_n])
-    # the video is indexed at every frame! use frame number as index
-
-    # plot bounding box: box and centroid
-    for ds_i, ds in enumerate([ds_nan, ds_ff, ds_interp]):
-        # plot box
-        top_left_corner = (
-            ds.position.sel(time=frame_n, individuals="id_1").data
-            - ds.shape.sel(time=frame_n, individuals="id_1").data / 2
-        )
-        bbox = plt.Rectangle(
-            xy=tuple(top_left_corner),
-            width=ds.shape.sel(
-                time=frame_n, individuals="id_1", space="x"
-            ).data,  # x coord
-            height=ds.shape.sel(
-                time=frame_n, individuals="id_1", space="y"
-            ).data,  # y coord of shape array
-            edgecolor=color_cycle[ds_i],
-            facecolor="none",  # transparent fill
-            linewidth=[4.5, 1.5, 1.5][ds_i],
-            linestyle=["dotted", "solid", "solid"][ds_i],
-            label=["nan", "ffill", "linear"][ds_i],
-        )
-        ax.add_patch(bbox)
-
-        # plot centroid
-        ax.scatter(
-            x=ds.position.sel(
-                time=frame_n, individuals="id_1", space="x"
-            ).data,
-            y=ds.position.sel(
-                time=frame_n, individuals="id_1", space="y"
-            ).data,
-            s=5,
-            color=color_cycle[ds_i],
-        )
-
-    if frame_n == 0:
-        ax.legend()
-    ax.set_title(f"Frame {frame_n}")
-
-fig.tight_layout()
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Export as csv file

From 9cd39a52acf92312bac076d26d7968dbb0457fdf Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:17:36 +0000
Subject: [PATCH 12/25] Fix to input/output snippet

---
 docs/source/user_guide/input_output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md
index 6cc1f2a4..d887b98d 100644
--- a/docs/source/user_guide/input_output.md
+++ b/docs/source/user_guide/input_output.md
@@ -238,7 +238,7 @@ Here is an example of how you can save a bounding boxes dataset to a .csv file:
 
 ```python
 # define name for output csv file
-file = 'tracking_output.csv"
+filepath = "tracking_output.csv"
 
 # open the csv file in write mode
 with open(filepath, mode="w", newline="") as file:

From 49b85825e50ee13ee225d0bd8e46bee52e3bcf27 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 17:36:31 +0000
Subject: [PATCH 13/25] Make figure sizes decent

---
 examples/load_and_reindex_bboxes.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
index c1798ff5..818c70b4 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_reindex_bboxes.py
@@ -14,6 +14,7 @@
 # %matplotlib widget
 import csv
 import math
+import os
 
 import sleap_io as sio
 from matplotlib import pyplot as plt
@@ -26,7 +27,8 @@
 # Load sample dataset
 # ------------------------
 # In this tutorial, we will use a sample bounding boxes dataset with
-# a single individual (a crab).
+# a single individual (a crab). The clip is part of the `Moving
+# Camouflaged Animals Dataset (MoCA) dataset <https://www.robots.ox.ac.uk/~vgg/data/MoCA/>`_.
 #
 # We will also download the associated video for visualising the data later.
 
@@ -49,11 +51,11 @@
 
 # %%
 # We can see the coordinates in the time dimension are expressed in frames,
-# and that only 1 in 5 frames of the video are annotated, plus
+# and that we only have data for 1 in 5 frames of the video, plus
 # the last frame (167).
 #
 # In the following sections of the notebook we will explore options to reindex
-# the dataset, and fill in the missing frames with reasonable values.
+# the dataset and fill in values for the frames with missing data.
 print(ds.time)
 
 # %%
@@ -87,7 +89,7 @@
 data_end_idx = 15
 
 # initialise figure
-fig = plt.figure(figsize=(15, 12))
+fig = plt.figure(figsize=(8, 20))  # width, height
 
 # get list of colors for plotting
 list_colors = plt.get_cmap("tab10").colors
@@ -95,7 +97,7 @@
 # loop over data and plot over corresponding frame
 for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
     # add subplot axes
-    ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1)
+    ax = plt.subplot(math.ceil(data_end_idx / 2), 2, p_i + 1)
 
     # plot frame
     # note: the video is indexed at every frame, so
@@ -230,12 +232,12 @@
 # and the linearly interpolated values in green.
 
 # initialise figure
-fig = plt.figure(figsize=(15, 12))
+fig = plt.figure(figsize=(8, 8))
 
 # loop over frames
-for frame_n in range(5):
+for frame_n in range(6):
     # add subplot axes
-    ax = plt.subplot(1, 6, frame_n + 1)
+    ax = plt.subplot(3, 2, frame_n + 1)
 
     # plot frame
     # note: the video is indexed at every frame, so
@@ -282,6 +284,8 @@
     if frame_n == 0:
         ax.legend()
     ax.set_title(f"Frame {frame_n}")
+    ax.set_xlabel("x (pixels)")
+    ax.set_ylabel("y (pixels)")
 
 fig.tight_layout()
 
@@ -321,3 +325,11 @@
             writer.writerow(
                 [frame, individual, x, y, width, height, confidence]
             )
+
+# %%
+# Remove the output file
+# ----------------------
+# We can remove the output file we have just created.
+# "nbsphinx": "hidden"
+
+os.remove(filepath)

From b241cf9711eee32764e0acdd55b08f47c0be78e6 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 2 Dec 2024 17:54:39 +0000
Subject: [PATCH 14/25] Select thumbnail

---
 examples/load_and_reindex_bboxes.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
index 818c70b4..78a50318 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_reindex_bboxes.py
@@ -231,6 +231,8 @@
 # are plotted in blue, while the forward filled values are plotted in orange
 # and the linearly interpolated values in green.
 
+# sphinx_gallery_thumbnail_number = 2
+
 # initialise figure
 fig = plt.figure(figsize=(8, 8))
 
@@ -329,7 +331,6 @@
 # %%
 # Remove the output file
 # ----------------------
-# We can remove the output file we have just created.
-# "nbsphinx": "hidden"
-
+# To remove the output file we have just created, we can run the following
+# code.
 os.remove(filepath)

From 7c5af56107eb806415df610a41c5dd5c882748d8 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 11:04:08 +0000
Subject: [PATCH 15/25] Apply suggestions from code review

Co-authored-by: Niko Sirmpilatze <niko.sirbiladze@gmail.com>
---
 examples/load_and_reindex_bboxes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
index 78a50318..e6f020e3 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_reindex_bboxes.py
@@ -50,12 +50,12 @@
 print(ds)
 
 # %%
-# We can see the coordinates in the time dimension are expressed in frames,
+# We can see that coordinates in the time dimension are expressed in frames,
 # and that we only have data for 1 in 5 frames of the video, plus
 # the last frame (167).
 #
-# In the following sections of the notebook we will explore options to reindex
-# the dataset and fill in values for the frames with missing data.
+# In the following sections of the notebook we will explore options to upsample
+# the dataset by filling in values for video frames with no data.
 print(ds.time)
 
 # %%

From 47f0c36c55efb814b334fe95603e096721a83c2b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 11:15:40 +0000
Subject: [PATCH 16/25] Reduce number of demo frames showed at the start

---
 examples/load_and_reindex_bboxes.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
index e6f020e3..dd928d95 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_reindex_bboxes.py
@@ -77,19 +77,20 @@
 # We can plot the data over the corresponding video frames to
 # visualise the bounding boxes around the tracked crab.
 #
-# Let's focus on the first 15 frames of the video, and plot the annotated
-# bounding box and centroid at each frame. The centroid at each frame is
-# marked as a blue marker with a red ring. The past centroid positions are
-# shown in blue and the future centroid positions in white.
+# Let's inspect the first 6 frames of the video for which we have
+# annotations, and plot the annotated bounding box and centroid at each frame.
+# The centroid at each frame is marked as a blue marker with a red ring.
+# The past centroid positions are shown in blue and the future centroid
+# positions in white.
 #
 # Note that in this case the camera is not static relative to the scene.
 
 # select indices of data to plot
 data_start_idx = 0
-data_end_idx = 15
+data_end_idx = 6
 
 # initialise figure
-fig = plt.figure(figsize=(8, 20))  # width, height
+fig = plt.figure(figsize=(8, 10))  # width, height
 
 # get list of colors for plotting
 list_colors = plt.get_cmap("tab10").colors

From 9a8f74ed226aac0036879d36c5b6d7287227cdfb Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 11:17:25 +0000
Subject: [PATCH 17/25] Rename last section

---
 examples/load_and_reindex_bboxes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py
index dd928d95..7ac00c2b 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_reindex_bboxes.py
@@ -330,7 +330,7 @@
             )
 
 # %%
-# Remove the output file
+# Clean-up
 # ----------------------
 # To remove the output file we have just created, we can run the following
 # code.

From c449c106310d350a0d984ea473866af08098d047 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 11:27:14 +0000
Subject: [PATCH 18/25] Change title and tagline

---
 ...oad_and_reindex_bboxes.py => load_and_upsample_bboxes.py} | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
 rename examples/{load_and_reindex_bboxes.py => load_and_upsample_bboxes.py} (98%)

diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_upsample_bboxes.py
similarity index 98%
rename from examples/load_and_reindex_bboxes.py
rename to examples/load_and_upsample_bboxes.py
index 7ac00c2b..ff76c846 100644
--- a/examples/load_and_reindex_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -1,8 +1,7 @@
-"""Load and reindex bounding boxes tracks
+"""Load and upsample bounding boxes tracks
 ==========================================
 
-Load an example dataset of bounding boxes' tracks and reindex
-it to every frame.
+Load bounding boxes tracks and upsample them to match the video frame rate.
 """
 
 # %%

From 1cda92e44dc79e53c256ff6c0f209cc4a3d73376 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 12:49:38 +0000
Subject: [PATCH 19/25] Add plots to check data imputation (rather than
 printing) and make colors consistent with the rest of the notebook.

---
 examples/load_and_upsample_bboxes.py | 220 ++++++++++++++++++++-------
 1 file changed, 163 insertions(+), 57 deletions(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index ff76c846..bdb289e6 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -27,7 +27,8 @@
 # ------------------------
 # In this tutorial, we will use a sample bounding boxes dataset with
 # a single individual (a crab). The clip is part of the `Moving
-# Camouflaged Animals Dataset (MoCA) dataset <https://www.robots.ox.ac.uk/~vgg/data/MoCA/>`_.
+# Camouflaged Animals Dataset (MoCA) dataset
+# <https://www.robots.ox.ac.uk/~vgg/data/MoCA/>`_.
 #
 # We will also download the associated video for visualising the data later.
 
@@ -49,13 +50,16 @@
 print(ds)
 
 # %%
-# We can see that coordinates in the time dimension are expressed in frames,
-# and that we only have data for 1 in 5 frames of the video, plus
-# the last frame (167).
+# We can see that the coordinates in the time dimension are expressed in
+# frames, and that we only have data for 1 in 5 frames of the video, plus
+# the last frame (frame number 167).
+
+print(ds.time)
+
+# %%
 #
 # In the following sections of the notebook we will explore options to upsample
-# the dataset by filling in values for video frames with no data.
-print(ds.time)
+# the dataset by filling in values for the video frames with no data.
 
 # %%
 # Inspect associated video
@@ -73,26 +77,15 @@
 
 
 # %%
-# We can plot the data over the corresponding video frames to
-# visualise the bounding boxes around the tracked crab.
-#
 # Let's inspect the first 6 frames of the video for which we have
 # annotations, and plot the annotated bounding box and centroid at each frame.
-# The centroid at each frame is marked as a blue marker with a red ring.
-# The past centroid positions are shown in blue and the future centroid
-# positions in white.
-#
-# Note that in this case the camera is not static relative to the scene.
 
 # select indices of data to plot
 data_start_idx = 0
 data_end_idx = 6
 
 # initialise figure
-fig = plt.figure(figsize=(8, 10))  # width, height
-
-# get list of colors for plotting
-list_colors = plt.get_cmap("tab10").colors
+fig = plt.figure(figsize=(8, 8))  # width, height
 
 # loop over data and plot over corresponding frame
 for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
@@ -112,9 +105,10 @@
         xy=tuple(top_left_corner),
         width=ds.shape[data_idx, 0, 0].data,  # x coordinate of shape array
         height=ds.shape[data_idx, 0, 1].data,  # y coordinate of shape array
-        edgecolor=list_colors[0],
+        edgecolor="red",
         facecolor="none",
         linewidth=1.5,
+        label="current frame",
     )
     ax.add_patch(bbox)
 
@@ -123,33 +117,42 @@
         x=ds.position[data_idx, 0, 0].data,
         y=ds.position[data_idx, 0, 1].data,
         s=15,
-        color=list_colors[0],
-        edgecolors="red",
+        color="red",
     )
 
     # plot past centroid positions in blue
-    ax.scatter(
-        x=ds.position[:data_idx, 0, 0].data,
-        y=ds.position[:data_idx, 0, 1].data,
-        s=5,
-        color=list_colors[0],
-    )
+    if data_idx > 0:
+        ax.scatter(
+            x=ds.position[0:data_idx, 0, 0].data,
+            y=ds.position[0:data_idx, 0, 1].data,
+            s=5,
+            color="tab:blue",
+            label="past frames",
+        )
 
-    # plot future centroid positionsin white
+    # plot future centroid positions in white
     ax.scatter(
         x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data,
         y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data,
         s=5,
         color="white",
+        label="future frames",
     )
 
     ax.set_title(f"Frame {ds.time[data_idx].item()}")
     ax.set_xlabel("x (pixles)")
     ax.set_ylabel("y (pixels)")
     ax.set_xlabel("")
+    if p_i == 1:
+        ax.legend()
 
 fig.tight_layout()
 
+# %%
+#
+# The centroid at each frame is marked with a red marker. The past centroid
+# positions are shown in blue and the future centroid positions in white.
+# Note that in this case the camera is not static relative to the environment.
 
 # %%
 # Fill in empty values with forward filling
@@ -164,13 +167,44 @@
     method="ffill",  # propagate last valid index value forward
 )
 
-# check the first 14 frames of the data
-print("Position data array (first 14 frames):")
-print(ds_ff.position.data[:14, 0, :])  # time, individual, space
+# %%
+# We can verify with a plot that the missing values have been filled in
+# using the last valid value in time.
+
+# In the plot below, the original position and shape data is shown in black,
+# while the forward-filled values are shown in blue.
+
+fig, axs = plt.subplots(2, 2, figsize=(8, 6))
+for row in range(axs.shape[0]):
+    space_coord = ["x", "y"][row]
+    for col in range(axs.shape[1]):
+        ax = axs[row, col]
+        data_array_str = ["position", "shape"][col]
+        # plot original data
+        ax.scatter(
+            x=ds.time,
+            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
+            marker="o",
+            color="black",
+            label="original data",
+        )
+        # plot forward filled data
+        ax.plot(
+            ds_ff.time,
+            ds_ff[data_array_str].sel(individuals="id_1", space=space_coord),
+            marker=".",
+            linewidth=1,
+            color="tab:green",
+            label="upsampled data",
+        )
+        ax.set_ylabel(f"{space_coord} (pixels)")
+        if row == 0:
+            ax.set_title(f"Bounding box {data_array_str}")
+            if col == 1:
+                ax.legend()
+        if row == 1:
+            ax.set_xlabel("time (frames)")
 
-print("----")
-print("Shape data array (first 14 frames):")
-print(ds_ff.shape.data[:14, 0, :])  # time, individual, space
 
 # %%
 # Fill in empty values with NaN
@@ -182,19 +216,54 @@
     method=None,  # default
 )
 
-# check the first 14 frames of the data
-print("Position data array (first 14 frames):")
-print(ds_nan.position.data[:14, 0, :])
+# %%
+# Like before, we can verify with a plot that the missing values have been
+# filled with NaN values.
+fig, axs = plt.subplots(2, 2, figsize=(8, 6))
+for row in range(axs.shape[0]):
+    space_coord = ["x", "y"][row]
+    for col in range(axs.shape[1]):
+        ax = axs[row, col]
+        data_array_str = ["position", "shape"][col]
+        # plot original data
+        ax.scatter(
+            x=ds.time,
+            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
+            marker="o",
+            color="black",
+            label="original data",
+        )
+        # plot NaN filled data
+        ax.plot(
+            ds_nan.time,
+            ds_nan[data_array_str].sel(individuals="id_1", space=space_coord),
+            marker=".",
+            linewidth=1,
+            color="tab:blue",
+            label="upsampled data",
+        )
+        ax.set_ylabel(f"{space_coord} (pixels)")
+        if row == 0:
+            ax.set_title(f"Bounding box {data_array_str}")
+            if col == 1:
+                ax.legend()
+        if row == 1:
+            ax.set_xlabel("time (frames)")
 
+# %%
+# We can further confirm we have NaNs where expected by printing the first few
+# frames of the data.
+print("Position data array (first 10 frames):")
+print(ds_nan.position.isel(time=slice(0, 10), individuals=0).data)
 print("----")
-print("Shape data array (first 14 frames):")
-print(ds_nan.shape.data[:14, 0, :])
+print("Shape data array (first 10 frames):")
+print(ds_nan.shape.isel(time=slice(0, 10), individuals=0).data)
 
 # %%
 # Linearly interpolate NaN values
 # ----------------------------------------------------------
-# We can instead fill in the missing values in the dataset applying linear
-# interpolation to the ``position`` and ``shape`` data arrays. In this way,
+# We can instead fill in the missing values in the dataset by linearly
+# interpolating the ``position`` and ``shape`` data arrays. In this way,
 # we would be assuming that the centroid of the bounding box moves linearly
 # between the two annotated values, and its width and height change linearly
 # as well.
@@ -211,31 +280,66 @@
         print_report=False,
     )
 
-# check the first 14 frames of the data
-print("Position data array (first 14 frames):")
-print(ds_interp.position.data[:14, 0, :])
-
-print("----")
-print("Shape data array (first 14 frames):")
-print(ds_interp.shape.data[:14, 0, :])
+# %%
+# Like before, we can visually check that the missing data has been imputed as
+# expected by plotting the x and y coordinates of the position and shape arrays
+# in time.
+
+fig, axs = plt.subplots(2, 2, figsize=(8, 6))
+for row in range(axs.shape[0]):
+    space_coord = ["x", "y"][row]
+    for col in range(axs.shape[1]):
+        ax = axs[row, col]
+        data_array_str = ["position", "shape"][col]
+        # plot original data
+        ax.scatter(
+            x=ds.time,
+            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
+            marker="o",
+            color="black",
+            label="original data",
+        )
+        # plot linearly interpolated data
+        ax.plot(
+            ds_interp.time,
+            ds_interp[data_array_str].sel(
+                individuals="id_1", space=space_coord
+            ),
+            marker=".",
+            linewidth=1,
+            color="tab:orange",
+            label="upsampled data",
+        )
+        ax.set_ylabel(f"{space_coord} (pixels)")
+        if row == 0:
+            ax.set_title(f"Bounding box {data_array_str}")
+            if col == 1:
+                ax.legend()
+        if row == 1:
+            ax.set_xlabel("time (frames)")
 
+# %%
+# The plot above shows that between the original data points (in black),
+# the data is assumed to evolve linearly (in blue).
 
 # %%
-# Compare interpolation methods
-# ------------------------------
+# Compare methods
+# ----------------
 # We can now qualitatively compare the three different methods of filling
 # in the missing frames, by plotting the bounding boxes
-# for the first 6 frames of the video.
+# for the first few frames of the video.
 #
-# Remember only frames 0 and 5 are annotated in the original dataset. These
-# are plotted in blue, while the forward filled values are plotted in orange
-# and the linearly interpolated values in green.
+# Remember that not all frames of the video are annotated in the original
+# dataset. The original data are plotted in black, while the forward filled
+# values are plotted in orange and the linearly interpolated values in green.
 
-# sphinx_gallery_thumbnail_number = 2
+# sphinx_gallery_thumbnail_number = 4
 
 # initialise figure
 fig = plt.figure(figsize=(8, 8))
 
+list_colors = ["tab:blue", "tab:green", "tab:orange"]
+
 # loop over frames
 for frame_n in range(6):
     # add subplot axes
@@ -247,7 +351,9 @@
     ax.imshow(video[frame_n])
 
     # plot bounding box for each dataset
-    for ds_i, ds_one in enumerate([ds_nan, ds_ff, ds_interp]):
+    for ds_i, ds_one in enumerate(
+        [ds_nan, ds_ff, ds_interp]
+    ):  # blue, green , orange
         # plot box
         top_left_corner = (
             ds_one.position.sel(time=frame_n, individuals="id_1").data
@@ -297,7 +403,7 @@
 # Let's assume the dataset with the forward filled values is the best suited
 # for our task - we can now export the computed values to a .csv file
 #
-# Note that we currently do not provide explicit methods to export a
+# Note that currently we do not provide explicit methods to export a
 # ``movement`` bounding boxes dataset in a specific format. However, we can
 # easily save the bounding boxes’ trajectories to a .csv file using the
 # standard Python library ``csv``.

From 8f34dbcdb973397c3c132a89cdfaf8e4cfb4e629 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 13:05:56 +0000
Subject: [PATCH 20/25] Change numpy-style indexing to sel when looping thru
 video frames in first plot

---
 examples/load_and_upsample_bboxes.py | 47 +++++++++++++++-------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index bdb289e6..ce5d8496 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -80,31 +80,32 @@
 # Let's inspect the first 6 frames of the video for which we have
 # annotations, and plot the annotated bounding box and centroid at each frame.
 
-# select indices of data to plot
-data_start_idx = 0
-data_end_idx = 6
+# set last frame to plot
+end_frame_idx = 25
+# create list of frames to loop over with step=5
+list_frames = list(range(0, end_frame_idx + 1, 5))
 
 # initialise figure
 fig = plt.figure(figsize=(8, 8))  # width, height
 
-# loop over data and plot over corresponding frame
-for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)):
+# loop over selected frames and plot the data
+for i, frame_idx in enumerate(list_frames):
     # add subplot axes
-    ax = plt.subplot(math.ceil(data_end_idx / 2), 2, p_i + 1)
+    ax = plt.subplot(math.ceil(len(list_frames) / 2), 2, i + 1)
 
     # plot frame
-    # note: the video is indexed at every frame, so
-    # we use the frame number as index
-    ax.imshow(video[ds.time[data_idx].item()])
+    ax.imshow(video[frame_idx])
 
     # plot box at this frame
     top_left_corner = (
-        ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2
-    )
+        ds.position.sel(time=frame_idx).data
+        - ds.shape.sel(time=frame_idx).data / 2
+    ).squeeze()
+
     bbox = plt.Rectangle(
         xy=tuple(top_left_corner),
-        width=ds.shape[data_idx, 0, 0].data,  # x coordinate of shape array
-        height=ds.shape[data_idx, 0, 1].data,  # y coordinate of shape array
+        width=ds.shape.sel(time=frame_idx, space="x").item(),
+        height=ds.shape.sel(time=frame_idx, space="y").item(),
         edgecolor="red",
         facecolor="none",
         linewidth=1.5,
@@ -114,17 +115,17 @@
 
     # plot box's centroid at this frame with red ring
     ax.scatter(
-        x=ds.position[data_idx, 0, 0].data,
-        y=ds.position[data_idx, 0, 1].data,
+        x=ds.position.sel(time=frame_idx, space="x"),
+        y=ds.position.sel(time=frame_idx, space="y"),
         s=15,
         color="red",
     )
 
     # plot past centroid positions in blue
-    if data_idx > 0:
+    if frame_idx > 0:
         ax.scatter(
-            x=ds.position[0:data_idx, 0, 0].data,
-            y=ds.position[0:data_idx, 0, 1].data,
+            x=ds.position.sel(time=slice(0, frame_idx - 1), space="x"),
+            y=ds.position.sel(time=slice(0, frame_idx - 1), space="y"),
             s=5,
             color="tab:blue",
             label="past frames",
@@ -132,23 +133,25 @@
 
     # plot future centroid positions in white
     ax.scatter(
-        x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data,
-        y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data,
+        x=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="x"),
+        y=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="y"),
         s=5,
         color="white",
         label="future frames",
     )
 
-    ax.set_title(f"Frame {ds.time[data_idx].item()}")
+    ax.set_title(f"Frame {frame_idx}")
     ax.set_xlabel("x (pixles)")
     ax.set_ylabel("y (pixels)")
     ax.set_xlabel("")
-    if p_i == 1:
+    if frame_idx == 1:
         ax.legend()
 
 fig.tight_layout()
 
 # %%
+# We used ``xarray``'s ``.sel()`` method to select the data for the
+# relevant frames directly.
 #
 # The centroid at each frame is marked with a red marker. The past centroid
 # positions are shown in blue and the future centroid positions in white.

From 3bce7901a9d08edaa41ad50b0f5cd1b0ea2d7b46 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:49:09 +0000
Subject: [PATCH 21/25] Replace empty by missing

---
 examples/load_and_upsample_bboxes.py | 262 +++++++++++----------------
 1 file changed, 105 insertions(+), 157 deletions(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index ce5d8496..42971959 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -57,7 +57,6 @@
 print(ds.time)
 
 # %%
-#
 # In the following sections of the notebook we will explore options to upsample
 # the dataset by filling in values for the video frames with no data.
 
@@ -80,6 +79,8 @@
 # Let's inspect the first 6 frames of the video for which we have
 # annotations, and plot the annotated bounding box and centroid at each frame.
 
+# sphinx_gallery_thumbnail_number = 1
+
 # set last frame to plot
 end_frame_idx = 25
 # create list of frames to loop over with step=5
@@ -126,7 +127,7 @@
         ax.scatter(
             x=ds.position.sel(time=slice(0, frame_idx - 1), space="x"),
             y=ds.position.sel(time=slice(0, frame_idx - 1), space="y"),
-            s=5,
+            s=10,
             color="tab:blue",
             label="past frames",
         )
@@ -135,11 +136,12 @@
     ax.scatter(
         x=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="x"),
         y=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="y"),
-        s=5,
+        s=10,
         color="white",
         label="future frames",
     )
 
+    # set title and labels
     ax.set_title(f"Frame {frame_idx}")
     ax.set_xlabel("x (pixles)")
     ax.set_ylabel("y (pixels)")
@@ -160,7 +162,7 @@
 # %%
 # Fill in empty values with forward filling
 # ----------------------------------------------------
-# We can fill in the frames with missing values for the  ``position`` and
+# We can fill in the frames with empty values for the  ``position`` and
 # ``shape`` arrays by taking the last valid value in time. In this way, a
 # box's position and shape stay constant if for a current frame the box
 # has no annotation defined.
@@ -171,87 +173,81 @@
 )
 
 # %%
-# We can verify with a plot that the missing values have been filled in
+# We can verify with a plot that the empty values have been filled in
 # using the last valid value in time.
 
-# In the plot below, the original position and shape data is shown in black,
-# while the forward-filled values are shown in blue.
+# %%
+# In the plot below, the original ``position`` and ``shape`` data is shown
+# in black, while the forward-filled values are shown in green.
+
+
+# We define a convenience function to plot the ``position`` and ``shape``
+# space coordinates for the input dataset and a filled one.
+def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
+    """Compare the x and y coordinates of the position and shape arrays in time
+    for the input and filled datasets.
+    """
+    fig, axs = plt.subplots(2, 2, figsize=(8, 6))
+    for row in range(axs.shape[0]):
+        space_coord = ["x", "y"][row]
+        for col in range(axs.shape[1]):
+            ax = axs[row, col]
+            data_array_str = ["position", "shape"][col]
+
+            # plot original data
+            ax.scatter(
+                x=ds_input_data.time,
+                y=ds_input_data[data_array_str].sel(
+                    individuals="id_1", space=space_coord
+                ),
+                marker="o",
+                color="black",
+                label="original data",
+            )
 
-fig, axs = plt.subplots(2, 2, figsize=(8, 6))
-for row in range(axs.shape[0]):
-    space_coord = ["x", "y"][row]
-    for col in range(axs.shape[1]):
-        ax = axs[row, col]
-        data_array_str = ["position", "shape"][col]
-        # plot original data
-        ax.scatter(
-            x=ds.time,
-            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
-            marker="o",
-            color="black",
-            label="original data",
-        )
-        # plot forward filled data
-        ax.plot(
-            ds_ff.time,
-            ds_ff[data_array_str].sel(individuals="id_1", space=space_coord),
-            marker=".",
-            linewidth=1,
-            color="tab:green",
-            label="upsampled data",
-        )
-        ax.set_ylabel(f"{space_coord} (pixels)")
-        if row == 0:
-            ax.set_title(f"Bounding box {data_array_str}")
-            if col == 1:
-                ax.legend()
-        if row == 1:
-            ax.set_xlabel("time (frames)")
+            # plot forward filled data
+            ax.plot(
+                ds_filled.time,
+                ds_filled[data_array_str].sel(
+                    individuals="id_1", space=space_coord
+                ),
+                marker=".",
+                linewidth=1,
+                color=color_filled,
+                label="upsampled data",
+            )
+
+            # set axes labels and legend
+            ax.set_ylabel(f"{space_coord} (pixels)")
+            if row == 0:
+                ax.set_title(f"Bounding box {data_array_str}")
+                if col == 1:
+                    ax.legend()
+            if row == 1:
+                ax.set_xlabel("time (frames)")
 
 
+# plot
+plot_position_and_shape_xy_coords(
+    ds, ds_filled=ds_ff, color_filled="tab:green"
+)
+
 # %%
 # Fill in empty values with NaN
 # ----------------------------------------------------
-# Alternatively, we can fill in the missing frames with NaN values.
-# This can be useful if we want to interpolate the missing values later.
+# Alternatively, we can fill in the empty frames with NaN values.
+# This can be useful if we want to interpolate later.
 ds_nan = ds.reindex(
     {"time": list(range(ds.time[-1].item()))},
     method=None,  # default
 )
 
 # %%
-# Like before, we can verify with a plot that the missing values have been
+# Like before, we can verify with a plot that the empty values have been
 # filled with NaN values.
-fig, axs = plt.subplots(2, 2, figsize=(8, 6))
-for row in range(axs.shape[0]):
-    space_coord = ["x", "y"][row]
-    for col in range(axs.shape[1]):
-        ax = axs[row, col]
-        data_array_str = ["position", "shape"][col]
-        # plot original data
-        ax.scatter(
-            x=ds.time,
-            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
-            marker="o",
-            color="black",
-            label="original data",
-        )
-        # plot NaN filled data
-        ax.plot(
-            ds_nan.time,
-            ds_nan[data_array_str].sel(individuals="id_1", space=space_coord),
-            marker=".",
-            linewidth=1,
-            color="tab:blue",
-            label="upsampled data",
-        )
-        ax.set_ylabel(f"{space_coord} (pixels)")
-        if row == 0:
-            ax.set_title(f"Bounding box {data_array_str}")
-            if col == 1:
-                ax.legend()
-        if row == 1:
-            ax.set_xlabel("time (frames)")
+plot_position_and_shape_xy_coords(
+    ds, ds_filled=ds_nan, color_filled="tab:blue"
+)
 
 # %%
 # We can further confirm we have NaNs where expected by printing the first few
@@ -265,7 +261,7 @@
 # %%
 # Linearly interpolate NaN values
 # ----------------------------------------------------------
-# We can instead fill in the missing values in the dataset by linearly
+# We can instead fill in the empty values in the dataset by linearly
 # interpolating the ``position`` and ``shape`` data arrays. In this way,
 # we would be assuming that the centroid of the bounding box moves linearly
 # between the two annotated values, and its width and height change linearly
@@ -284,59 +280,29 @@
     )
 
 # %%
-# Like before, we can visually check that the missing data has been imputed as
-# expected by plotting the x and y coordinates of the position and shape arrays
+# Like before, we can visually check that the empty data has been imputed as
+# expected by plotting the x and y coordinates of the ``position``
+# and ``shape`` arrays
 # in time.
 
-fig, axs = plt.subplots(2, 2, figsize=(8, 6))
-for row in range(axs.shape[0]):
-    space_coord = ["x", "y"][row]
-    for col in range(axs.shape[1]):
-        ax = axs[row, col]
-        data_array_str = ["position", "shape"][col]
-        # plot original data
-        ax.scatter(
-            x=ds.time,
-            y=ds[data_array_str].sel(individuals="id_1", space=space_coord),
-            marker="o",
-            color="black",
-            label="original data",
-        )
-        # plot linearly interpolated data
-        ax.plot(
-            ds_interp.time,
-            ds_interp[data_array_str].sel(
-                individuals="id_1", space=space_coord
-            ),
-            marker=".",
-            linewidth=1,
-            color="tab:orange",
-            label="upsampled data",
-        )
-        ax.set_ylabel(f"{space_coord} (pixels)")
-        if row == 0:
-            ax.set_title(f"Bounding box {data_array_str}")
-            if col == 1:
-                ax.legend()
-        if row == 1:
-            ax.set_xlabel("time (frames)")
+plot_position_and_shape_xy_coords(
+    ds, ds_filled=ds_interp, color_filled="tab:orange"
+)
 
 # %%
 # The plot above shows that between the original data points (in black),
-# the data is assumed to evolve linearly (in blue).
+# the data is assumed to evolve linearly (in orange).
 
 # %%
 # Compare methods
 # ----------------
-# We can now qualitatively compare the three different methods of filling
-# in the missing frames, by plotting the bounding boxes
-# for the first few frames of the video.
+# We can now qualitatively compare the bounding boxes computed
+# with the three different filling methods we have seen: forward filling,
+# NaN filling and linear interpolation
 #
-# Remember that not all frames of the video are annotated in the original
-# dataset. The original data are plotted in black, while the forward filled
-# values are plotted in orange and the linearly interpolated values in green.
-
-# sphinx_gallery_thumbnail_number = 4
+# In the plot below, the NaN-filled data are plotted in blue, the forward
+# filled values are plotted in orange, and the linearly interpolated values
+# are shown in green.
 
 # initialise figure
 fig = plt.figure(figsize=(8, 8))
@@ -344,59 +310,48 @@
 list_colors = ["tab:blue", "tab:green", "tab:orange"]
 
 # loop over frames
-for frame_n in range(6):
+for frame_idx in range(6):
     # add subplot axes
-    ax = plt.subplot(3, 2, frame_n + 1)
+    ax = plt.subplot(3, 2, frame_idx + 1)
 
     # plot frame
-    # note: the video is indexed at every frame, so
-    # we use the frame number as index
-    ax.imshow(video[frame_n])
+    ax.imshow(video[frame_idx])
 
     # plot bounding box for each dataset
-    for ds_i, ds_one in enumerate(
-        [ds_nan, ds_ff, ds_interp]
-    ):  # blue, green , orange
+    for ds_i, ds_filled in enumerate([ds_nan, ds_ff, ds_interp]):
         # plot box
         top_left_corner = (
-            ds_one.position.sel(time=frame_n, individuals="id_1").data
-            - ds_one.shape.sel(time=frame_n, individuals="id_1").data / 2
-        )
+            ds_filled.position.sel(time=frame_idx).data
+            - ds_filled.shape.sel(time=frame_idx).data / 2
+        ).squeeze()
+
         bbox = plt.Rectangle(
             xy=tuple(top_left_corner),
-            width=ds_one.shape.sel(
-                time=frame_n, individuals="id_1", space="x"
-            ).data,
-            height=ds_one.shape.sel(
-                time=frame_n, individuals="id_1", space="y"
-            ).data,
+            width=ds_filled.shape.sel(time=frame_idx, space="x").item(),
+            height=ds_filled.shape.sel(time=frame_idx, space="y").item(),
             edgecolor=list_colors[ds_i],
             facecolor="none",
             # make line for NaN dataset thicker and dotted
-            linewidth=[5, 1.5, 1.5][ds_i],
-            linestyle=["dotted", "solid", "solid"][ds_i],
             label=["nan", "ffill", "linear"][ds_i],
+            linewidth=[8, 2.5, 2.5][ds_i],
+            linestyle=["dotted", "solid", "solid"][ds_i],
         )
         ax.add_patch(bbox)
 
         # plot centroid
         ax.scatter(
-            x=ds_one.position.sel(
-                time=frame_n, individuals="id_1", space="x"
-            ).data,
-            y=ds_one.position.sel(
-                time=frame_n, individuals="id_1", space="y"
-            ).data,
-            s=5,
+            x=ds_filled.position.sel(time=frame_idx, space="x"),
+            y=ds_filled.position.sel(time=frame_idx, space="y"),
+            s=20,
             color=list_colors[ds_i],
         )
 
-    # add legend to first frame
-    if frame_n == 0:
-        ax.legend()
-    ax.set_title(f"Frame {frame_n}")
+    # set title and labels
+    ax.set_title(f"Frame {frame_idx}")
     ax.set_xlabel("x (pixels)")
     ax.set_ylabel("y (pixels)")
+    if frame_idx == 0:
+        ax.legend()
 
 fig.tight_layout()
 
@@ -419,23 +374,16 @@
     writer = csv.writer(file)
 
     # write the header
-    writer.writerow(
-        ["frame_idx", "bbox_ID", "x", "y", "width", "height", "confidence"]
-    )
+    writer.writerow(["frame", "ID", "x", "y", "width", "height"])
 
     # write the data
-    for individual in ds.individuals.data:
-        for frame in ds.time.data:
-            x, y = ds.position.sel(time=frame, individuals=individual).data
-            width, height = ds.shape.sel(
-                time=frame, individuals=individual
-            ).data
-            confidence = ds.confidence.sel(
+    for individual in ds_ff.individuals.data:
+        for frame in ds_ff.time.data:
+            x, y = ds_ff.position.sel(time=frame, individuals=individual).data
+            width, height = ds_ff.shape.sel(
                 time=frame, individuals=individual
             ).data
-            writer.writerow(
-                [frame, individual, x, y, width, height, confidence]
-            )
+            writer.writerow([frame, individual, x, y, width, height])
 
 # %%
 # Clean-up

From 4cc2d511e85409caff974ac8546932a8fd1cc165 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:54:20 +0000
Subject: [PATCH 22/25] Fix text cell

---
 examples/load_and_upsample_bboxes.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index 42971959..575a6b34 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -175,14 +175,12 @@
 # %%
 # We can verify with a plot that the empty values have been filled in
 # using the last valid value in time.
-
-# %%
-# In the plot below, the original ``position`` and ``shape`` data is shown
-# in black, while the forward-filled values are shown in green.
+#
+# For this we define a convenience function to plot the x and y coordinates
+# of the ``position`` and  ``shape`` arrays, for the input dataset and for
+# a filled one.
 
 
-# We define a convenience function to plot the ``position`` and ``shape``
-# space coordinates for the input dataset and a filled one.
 def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
     """Compare the x and y coordinates of the position and shape arrays in time
     for the input and filled datasets.
@@ -227,7 +225,11 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
                 ax.set_xlabel("time (frames)")
 
 
-# plot
+# %%
+# In the plot below, the original ``position`` and ``shape`` data is shown
+# in black, while the forward-filled values are shown in green.
+
+
 plot_position_and_shape_xy_coords(
     ds, ds_filled=ds_ff, color_filled="tab:green"
 )

From e5c2ca37fc6f83c62caa8fb5254c485afe82f540 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:56:06 +0000
Subject: [PATCH 23/25] Fix one sonarcloud issue

---
 examples/load_and_upsample_bboxes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index 575a6b34..022a0f52 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -185,7 +185,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
     """Compare the x and y coordinates of the position and shape arrays in time
     for the input and filled datasets.
     """
-    fig, axs = plt.subplots(2, 2, figsize=(8, 6))
+    _, axs = plt.subplots(2, 2, figsize=(8, 6))
     for row in range(axs.shape[0]):
         space_coord = ["x", "y"][row]
         for col in range(axs.shape[1]):

From 6e2cf1fc511c9a4e5d34837ea7d1cbf2fe149834 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 6 Dec 2024 11:44:48 +0000
Subject: [PATCH 24/25] Add tight_layout

---
 examples/load_and_upsample_bboxes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index 022a0f52..7b312294 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -185,7 +185,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
     """Compare the x and y coordinates of the position and shape arrays in time
     for the input and filled datasets.
     """
-    _, axs = plt.subplots(2, 2, figsize=(8, 6))
+    fig, axs = plt.subplots(2, 2, figsize=(8, 6))
     for row in range(axs.shape[0]):
         space_coord = ["x", "y"][row]
         for col in range(axs.shape[1]):
@@ -224,6 +224,8 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
             if row == 1:
                 ax.set_xlabel("time (frames)")
 
+    fig.tight_layout()
+
 
 # %%
 # In the plot below, the original ``position`` and ``shape`` data is shown

From 9584634f1a029cbae215cef1719b1c3994d23f35 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:01:43 +0000
Subject: [PATCH 25/25] Fix missing last timestep

---
 examples/load_and_upsample_bboxes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py
index 7b312294..cfb1bba3 100644
--- a/examples/load_and_upsample_bboxes.py
+++ b/examples/load_and_upsample_bboxes.py
@@ -168,7 +168,7 @@
 # has no annotation defined.
 
 ds_ff = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
+    {"time": list(range(ds.time[-1].item() + 1))},
     method="ffill",  # propagate last valid index value forward
 )
 
@@ -242,7 +242,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled):
 # Alternatively, we can fill in the empty frames with NaN values.
 # This can be useful if we want to interpolate later.
 ds_nan = ds.reindex(
-    {"time": list(range(ds.time[-1].item()))},
+    {"time": list(range(ds.time[-1].item() + 1))},
     method=None,  # default
 )