From 8c3997c9c3d57d524b18d1a46c9a0c3f638ea87a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 10 Sep 2024 11:10:37 +0100 Subject: [PATCH 01/25] exploring predictions vs ground-truth --- examples/load_and_explore_bboxes.py | 233 ++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 examples/load_and_explore_bboxes.py diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py new file mode 100644 index 00000000..000dc561 --- /dev/null +++ b/examples/load_and_explore_bboxes.py @@ -0,0 +1,233 @@ +"""Inspect crab trajectories""" + +# %% +import ast +import itertools +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from cycler import cycler + +from movement.io import load_bboxes + +# %matplotlib widget + +# %%%%%%%%%%%%%%%%%%%%% +# input data +file_csv = ( + "/Users/sofia/arc/project_Zoo_crabs/escape_clips/" + "crabs_track_output_selected_clips/04.09.2023-04-Right_RE_test/predicted_tracks.csv" +) + + +# load ground truth! +groundtruth_csv = ( + "/Users/sofia/arc/project_Zoo_crabs/escape_clips/" + "04.09.2023-04-Right_RE_test_corrected_ST_csv_SM.csv" +) + +# %%%%%%%%%%%%%%%%%%%%%%%%% +# Fix ground truth file +df = pd.read_csv(groundtruth_csv, sep=",", header=0) + +# find duplicates +list_unique_filenames = list(set(df.filename)) +filenames_to_rep_ID = {} +for file in list_unique_filenames: + df_one_filename = df.loc[df["filename"] == file] + + list_track_ids_one_filename = [ + int(ast.literal_eval(row.region_attributes)["track"]) + for row in df_one_filename.itertuples() + ] + + if len(set(list_track_ids_one_filename)) != len( + list_track_ids_one_filename + ): + # [ + # list_track_ids_one_filename.remove(k) + # for k in set(list_track_ids_one_filename) + # ] # there could be more than one duplicate!!! + for k in set(list_track_ids_one_filename): + list_track_ids_one_filename.remove(k) # remove first occurrence + + filenames_to_rep_ID[file] = list_track_ids_one_filename + +# delete duplicate rows +for file, list_rep_ID in filenames_to_rep_ID.items(): + for rep_ID in list_rep_ID: + # find repeated rows for selected file and rep_ID + matching_rows = df[ + (df["filename"] == file) + & (df["region_attributes"] == f'{{"track":"{rep_ID}"}}') + ] + + # Identify the index of the first matching row + if not matching_rows.empty: + indices_to_drop = matching_rows.index[1:] + + # Drop all but the first matching row + df = df.drop(indices_to_drop) + +# save to csv +groundtruth_csv_corrected = Path(groundtruth_csv).parent / Path( + Path(groundtruth_csv).stem + "_corrected.csv" +) +df.to_csv(groundtruth_csv_corrected, index=False) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Read corrected ground truth as movement dataset +ds_gt = load_bboxes.from_via_tracks_file( + groundtruth_csv_corrected, fps=None, use_frame_numbers_from_file=False +) +print(ds_gt) + +# Print summary +print(f"{ds_gt.source_file}") +print(f"Number of frames: {ds_gt.sizes['time']}") +print(f"Number of individuals: {ds_gt.sizes['individuals']}") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Read predictions as movement dataset +ds_pred = load_bboxes.from_via_tracks_file( + file_csv, fps=None, use_frame_numbers_from_file=False +) +print(ds_pred) + +# Print summary +print(f"{ds_pred.source_file}") +print(f"Number of frames: {ds_pred.sizes['time']}") +print(f"Number of individuals: {ds_pred.sizes['individuals']}") + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Check when individuals are labelled +# check x and y coordinates are nan at the same locations +# TODO: change colormap to white and blue +assert ( + np.isnan(ds_gt.position.data[:, :, 0]) + == np.isnan(ds_gt.position.data[:, :, 1]) +).all() + +fig, axs = plt.subplots(2, 1, sharex=True) + +axs[0].matshow(np.isnan(ds_gt.position.data[:, :, 0]).T, aspect="auto") +axs[0].set_title("Ground truth") +axs[0].set_xlabel("time (frames)") +axs[0].set_ylabel("individual") + +axs[1].matshow(np.isnan(ds_pred.position.data[:, :, 0]).T, aspect="auto") +axs[1].set_title("Prediction") +axs[1].set_xlabel("time (frames)") +axs[1].set_ylabel("tracks") +axs[1].xaxis.tick_bottom() + +# # add reference +# axs[1].hlines( +# y=ds_gt.sizes["individuals"], +# xmin=0, +# xmax=ds_gt.sizes["time"] - 1, +# color="red", +# ) + +fig.subplots_adjust(hspace=0.6, wspace=0.5) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Compare lengths of continuous tracks & plot distrib + +# for each individual, find the length of chunks between nans +map_individuals_to_chunks = {} +for individual in range(ds_gt.sizes["individuals"]): + # find nans in x-coord for that individual + nan_idx = np.isnan(ds_gt.position.data[:, individual, 0]) + + # find lengths of continuous tracks + len_chunks = [ + len(list(group_iter)) + for key, group_iter in itertools.groupby(nan_idx) + if not key + ] + + map_individuals_to_chunks[individual] = len_chunks + +# %% +fig, ax = plt.subplots(1, 1) +for ind, list_chunks in map_individuals_to_chunks.items(): + ax.scatter([ind] * len(list_chunks), list_chunks) + + +# [sum(1 for _ in input) for _, input in itertools.groupby(_)] + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Check confidence of detections +confidence_values = ds_pred.confidence.data.flatten() +nan_median_confidence = np.nanmedian(confidence_values) + + +fig, ax = plt.subplots(1, 1) +hist = ax.hist(confidence_values, bins=np.arange(0, 1.01, 0.05)) +ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red") +ax.set_aspect("auto") + +fig, ax = plt.subplots(1, 1) +ax.hist(ds_pred.confidence.data.flatten(), bins=np.arange(0.6, 1.01, 0.01)) +ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red") +ax.set_aspect("auto") + +print(f"Median confidence: {nan_median_confidence}") + +# %% +# plot all trajectories +# ds.position ---> time, individuals, space +# why noise? remove low predictions? + +for ds, title in zip( + [ds_gt, ds_pred], ["Ground truth", "Prediction"], strict=False +): + # cmap = plt.get_cmap('tab10') + fig, ax = plt.subplots(1, 1) + plt.rcParams["axes.prop_cycle"] = cycler( + color=plt.get_cmap("tab10").colors + ) + + for ind_idx in range(ds.sizes["individuals"]): + ax.scatter( + x=ds.position[:, ind_idx, 0], # nframes, nindividuals, x + y=ds.position[:, ind_idx, 1], + s=1, + # c=cmap(ind_idx), + ) + ax.set_aspect("equal") + ax.set_ylim(-150, 2500) + ax.set_xlabel("x (pixels)") + ax.set_ylabel("y (pixels)") + ax.set_title(title) + plt.show() + +# %% +# first 10 individuals +fig, ax = plt.subplots(1, 1) + +ax.scatter(x=ds_pred.position[:, :10, 0], y=ds_pred.position[:, :10, 1], s=1) +ax.set_aspect("equal") +ax.set_xlabel("x (pixels)") +ax.set_ylabel("y (pixels)") +# %% +# groupby +# It generates a break or new group every time the value of the key function +# changes +# input = ( +# np.isnan(ds_gt.position.data[:,0,0]*ds_gt.position.data[:,0,1] +# ).astype(int)) +input = [0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1] +len_per_chunk = [ + (key, len(list(group_iter))) + for key, group_iter in itertools.groupby(input) +] +len_per_chunk_with_1 = [ + len(list(group_iter)) + for key, group_iter in itertools.groupby(input) + if key == 1 +] From d956aae6fd6ab3e75a56bace5b48ab4cec307a37 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:42:08 +0100 Subject: [PATCH 02/25] Draft --- examples/load_and_explore_bboxes.py | 283 +++++++--------------------- 1 file changed, 72 insertions(+), 211 deletions(-) diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py index 000dc561..b6443418 100644 --- a/examples/load_and_explore_bboxes.py +++ b/examples/load_and_explore_bboxes.py @@ -1,233 +1,94 @@ -"""Inspect crab trajectories""" +"""Load and explore bboxes tracks +=============================== -# %% -import ast -import itertools -from pathlib import Path +Load and explore an example dataset of bounding boxes tracks. +""" -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd +# %% +# Imports +# ------- +# For interactive plots: install ipympl with `pip install ipympl` and uncomment +# the following line in your notebook +# %matplotlib widget from cycler import cycler +from matplotlib import pyplot as plt +from movement import sample_data from movement.io import load_bboxes -# %matplotlib widget - -# %%%%%%%%%%%%%%%%%%%%% -# input data -file_csv = ( - "/Users/sofia/arc/project_Zoo_crabs/escape_clips/" - "crabs_track_output_selected_clips/04.09.2023-04-Right_RE_test/predicted_tracks.csv" -) - - -# load ground truth! -groundtruth_csv = ( - "/Users/sofia/arc/project_Zoo_crabs/escape_clips/" - "04.09.2023-04-Right_RE_test_corrected_ST_csv_SM.csv" -) - -# %%%%%%%%%%%%%%%%%%%%%%%%% -# Fix ground truth file -df = pd.read_csv(groundtruth_csv, sep=",", header=0) - -# find duplicates -list_unique_filenames = list(set(df.filename)) -filenames_to_rep_ID = {} -for file in list_unique_filenames: - df_one_filename = df.loc[df["filename"] == file] - - list_track_ids_one_filename = [ - int(ast.literal_eval(row.region_attributes)["track"]) - for row in df_one_filename.itertuples() - ] - - if len(set(list_track_ids_one_filename)) != len( - list_track_ids_one_filename - ): - # [ - # list_track_ids_one_filename.remove(k) - # for k in set(list_track_ids_one_filename) - # ] # there could be more than one duplicate!!! - for k in set(list_track_ids_one_filename): - list_track_ids_one_filename.remove(k) # remove first occurrence - - filenames_to_rep_ID[file] = list_track_ids_one_filename - -# delete duplicate rows -for file, list_rep_ID in filenames_to_rep_ID.items(): - for rep_ID in list_rep_ID: - # find repeated rows for selected file and rep_ID - matching_rows = df[ - (df["filename"] == file) - & (df["region_attributes"] == f'{{"track":"{rep_ID}"}}') - ] - - # Identify the index of the first matching row - if not matching_rows.empty: - indices_to_drop = matching_rows.index[1:] - - # Drop all but the first matching row - df = df.drop(indices_to_drop) - -# save to csv -groundtruth_csv_corrected = Path(groundtruth_csv).parent / Path( - Path(groundtruth_csv).stem + "_corrected.csv" -) -df.to_csv(groundtruth_csv_corrected, index=False) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Read corrected ground truth as movement dataset -ds_gt = load_bboxes.from_via_tracks_file( - groundtruth_csv_corrected, fps=None, use_frame_numbers_from_file=False -) -print(ds_gt) - -# Print summary -print(f"{ds_gt.source_file}") -print(f"Number of frames: {ds_gt.sizes['time']}") -print(f"Number of individuals: {ds_gt.sizes['individuals']}") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Read predictions as movement dataset -ds_pred = load_bboxes.from_via_tracks_file( - file_csv, fps=None, use_frame_numbers_from_file=False -) -print(ds_pred) - -# Print summary -print(f"{ds_pred.source_file}") -print(f"Number of frames: {ds_pred.sizes['time']}") -print(f"Number of individuals: {ds_pred.sizes['individuals']}") - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Check when individuals are labelled -# check x and y coordinates are nan at the same locations -# TODO: change colormap to white and blue -assert ( - np.isnan(ds_gt.position.data[:, :, 0]) - == np.isnan(ds_gt.position.data[:, :, 1]) -).all() - -fig, axs = plt.subplots(2, 1, sharex=True) - -axs[0].matshow(np.isnan(ds_gt.position.data[:, :, 0]).T, aspect="auto") -axs[0].set_title("Ground truth") -axs[0].set_xlabel("time (frames)") -axs[0].set_ylabel("individual") - -axs[1].matshow(np.isnan(ds_pred.position.data[:, :, 0]).T, aspect="auto") -axs[1].set_title("Prediction") -axs[1].set_xlabel("time (frames)") -axs[1].set_ylabel("tracks") -axs[1].xaxis.tick_bottom() - -# # add reference -# axs[1].hlines( -# y=ds_gt.sizes["individuals"], -# xmin=0, -# xmax=ds_gt.sizes["time"] - 1, -# color="red", -# ) - -fig.subplots_adjust(hspace=0.6, wspace=0.5) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Compare lengths of continuous tracks & plot distrib - -# for each individual, find the length of chunks between nans -map_individuals_to_chunks = {} -for individual in range(ds_gt.sizes["individuals"]): - # find nans in x-coord for that individual - nan_idx = np.isnan(ds_gt.position.data[:, individual, 0]) - - # find lengths of continuous tracks - len_chunks = [ - len(list(group_iter)) - for key, group_iter in itertools.groupby(nan_idx) - if not key - ] - - map_individuals_to_chunks[individual] = len_chunks - # %% -fig, ax = plt.subplots(1, 1) -for ind, list_chunks in map_individuals_to_chunks.items(): - ax.scatter([ind] * len(list_chunks), list_chunks) +# Select sample data file +# -------------------- +# For the sake of this example, we will use the path to one of +# the sample datasets provided with ``movement``. +file_path = sample_data.fetch_dataset_paths( + "VIA_multiple-crabs_5-frames_labels.csv" +)["bboxes"] +print(file_path) -# [sum(1 for _ in input) for _, input in itertools.groupby(_)] +# %% +# Read file as a `movement` dataset +# ---------------------------------- +ds = load_bboxes.from_via_tracks_file(file_path) -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Check confidence of detections -confidence_values = ds_pred.confidence.data.flatten() -nan_median_confidence = np.nanmedian(confidence_values) +# print some information about the dataset +print(ds) +print("-----") +print(f"Number of individuals: {ds.sizes['individuals']}") +print(f"Number of frames: {ds.sizes['time']}") -fig, ax = plt.subplots(1, 1) -hist = ax.hist(confidence_values, bins=np.arange(0, 1.01, 0.05)) -ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red") -ax.set_aspect("auto") +# %% +# The dataset contains bounding boxes for 86 individuals, tracked for +# 5 frames, in the xy plane. +# +# We can also see from the printout of the dataset that it contains +# three data arrays: ``position``, ``shape`` and ``confidence``. +# +# We will use these three arrays in the following sections to produce +# informative plots of the tracked trajectories +# %% +# Plot trajectories and color by individual +# ----------------------------------------- -fig, ax = plt.subplots(1, 1) -ax.hist(ds_pred.confidence.data.flatten(), bins=np.arange(0.6, 1.01, 0.01)) -ax.vlines(x=nan_median_confidence, ymin=0, ymax=max(hist[0]), color="red") -ax.set_aspect("auto") +fig, ax = plt.subplots(1, 1) # , figsize=(15, 15)) -print(f"Median confidence: {nan_median_confidence}") +# add color cycler to axes +plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) +# get the list of colors in the cycle +color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] -# %% -# plot all trajectories -# ds.position ---> time, individuals, space -# why noise? remove low predictions? -for ds, title in zip( - [ds_gt, ds_pred], ["Ground truth", "Prediction"], strict=False -): - # cmap = plt.get_cmap('tab10') - fig, ax = plt.subplots(1, 1) - plt.rcParams["axes.prop_cycle"] = cycler( - color=plt.get_cmap("tab10").colors +for id_idx, id_str in enumerate(ds["individuals"].data): + ax.scatter( + x=ds.position.sel(individuals=id_str, space="x").data, + y=ds.position.sel(individuals=id_str, space="y").data, + s=1, + color=color_cycle[id_idx % len(color_cycle)], + ) + # find first frame with non-nan x-coord + start_frame = ds.time[ + ~ds.position.sel(individuals="id_1", space="y").isnull().data + ][0] + ax.text( + x=ds.position.sel( + time=start_frame, individuals=id_str, space="x" + ).data, + y=ds.position.sel( + time=start_frame, individuals=id_str, space="y" + ).data, + s=str(id_str), + horizontalalignment="center", + color=color_cycle[id_idx % len(color_cycle)], ) - for ind_idx in range(ds.sizes["individuals"]): - ax.scatter( - x=ds.position[:, ind_idx, 0], # nframes, nindividuals, x - y=ds.position[:, ind_idx, 1], - s=1, - # c=cmap(ind_idx), - ) - ax.set_aspect("equal") - ax.set_ylim(-150, 2500) - ax.set_xlabel("x (pixels)") - ax.set_ylabel("y (pixels)") - ax.set_title(title) - plt.show() - -# %% -# first 10 individuals -fig, ax = plt.subplots(1, 1) - -ax.scatter(x=ds_pred.position[:, :10, 0], y=ds_pred.position[:, :10, 1], s=1) +ax.invert_yaxis() # OJO! +# ax.set_ylim(0, 2160) +# ax.set_xlim(0, 4096) ax.set_aspect("equal") ax.set_xlabel("x (pixels)") ax.set_ylabel("y (pixels)") +plt.show() + # %% -# groupby -# It generates a break or new group every time the value of the key function -# changes -# input = ( -# np.isnan(ds_gt.position.data[:,0,0]*ds_gt.position.data[:,0,1] -# ).astype(int)) -input = [0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1] -len_per_chunk = [ - (key, len(list(group_iter))) - for key, group_iter in itertools.groupby(input) -] -len_per_chunk_with_1 = [ - len(list(group_iter)) - for key, group_iter in itertools.groupby(input) - if key == 1 -] From 2cf0cb847c3941d6963873300f64cbe81a8e2827 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:03:57 +0100 Subject: [PATCH 03/25] Sherlock example --- examples/load_and_explore_bboxes_sherlock.py | 141 +++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 examples/load_and_explore_bboxes_sherlock.py diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py new file mode 100644 index 00000000..30bf39be --- /dev/null +++ b/examples/load_and_explore_bboxes_sherlock.py @@ -0,0 +1,141 @@ +"""Load and explore bboxes tracks +=============================== + +Load and explore an example dataset of bounding boxes tracks. +""" + +# %% +# Imports +# ------- +# For interactive plots: install ipympl with `pip install ipympl` and uncomment +# the following line in your notebook +# %matplotlib widget +from pathlib import Path + +from cycler import cycler +from matplotlib import pyplot as plt + +from movement.io import load_bboxes + +# %% +# Select sample data file +# -------------------- +# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html +# - Edit name of files to include `frame_` + +# %% +# Read file as a `movement` dataset +# ---------------------------------- +file_path = ( + Path.home() + / "Downloads" + / "face_track_annotation" + / "data" + / "sherlock_ep01_tracks_FRCNN_SM.csv" +) +ds = load_bboxes.from_via_tracks_file( + str(file_path), use_frame_numbers_from_file=True +) + +# restrict to first shot only +start_end_frames_shot_1 = (11384, 11586) +frames_shot_1 = list( + range(start_end_frames_shot_1[0], start_end_frames_shot_1[1], 1) +) +ds = ds.sel(time=frames_shot_1).copy() # I think I need copy? + +# remove individuals whose position is nan for all frames in the shot +# bool_individuals_all_nan = np.all(np.isnan(ds.position.data), axis=(0, 2)) +# ds = ds.drop_sel(individuals=ds.individuals.data[bool_individuals_all_nan]) +ds = ds.dropna(dim="individuals", how="all") + +# print some information about the dataset +print(ds) +print("-----") +print(f"Number of individuals: {ds.sizes['individuals']}") +print(f"Number of frames: {ds.sizes['time']}") + + +# %% +# The reduced dataset contains bounding boxes for 2 individuals, tracked for +# 202 frames, in the xy plane. +# +# We can also see from the printout of the dataset that it contains +# three data arrays: ``position``, ``shape`` and ``confidence``. +# %% +# Plot trajectories of first shot and color by individual +# ----------------------------------------- + +fig, ax = plt.subplots(1, 1) + +# add color cycler to axes +plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) +# get the list of colors in the cycle +color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] + + +frame_number = frames_shot_1[0] +img = plt.imread( + str(file_path.parents[1] / "images" / f"{frame_number:08}.jpg") +) + +for id_idx, id_str in enumerate(ds["individuals"].data): + # plot first frame + ax.imshow(img) + + past_frames = [f for f in frames_shot_1 if f <= frame_number] + future_frames = [f for f in frames_shot_1 if f > frame_number] + + # plot past position of centroid in grey + ax.scatter( + x=ds.position.sel( + individuals=id_str, time=past_frames, space="x" + ).data, + y=ds.position.sel( + individuals=id_str, time=past_frames, space="y" + ).data, + s=1, + color="grey", + ) + # plot future trajectories of centroids in color + ax.scatter( + x=ds.position.sel( + individuals=id_str, time=future_frames, space="x" + ).data, + y=ds.position.sel( + individuals=id_str, time=future_frames, space="y" + ).data, + s=1, + color=color_cycle[id_idx % len(color_cycle)], + ) + # plot bbox in this frame + # ATT! currently position is the top left corner of bbox + # need to uncomment the line below if position loaded is centroid + top_left_corner = ( + ds.position.sel(individuals=id_str, time=frame_number).data + # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds.shape.sel( + individuals=id_str, time=frame_number, space="x" + ).data, + height=ds.shape.sel( + individuals=id_str, time=frame_number, space="y" + ).data, + edgecolor=color_cycle[id_idx % len(color_cycle)], + facecolor="none", # transparent fill + linewidth=1.5, + ) + ax.add_patch(bbox) + + +# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) +# ax.invert_yaxis() +ax.set_aspect("equal") +ax.set_xlabel("x (pixels)") +ax.set_ylabel("y (pixels)") +ax.set_title(f"Sherlock - shot 1, frame {frame_number}") +plt.show() + +# %% From b847f2e2b1840eac2a42747b463de3cfece5cc0c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:21:39 +0100 Subject: [PATCH 04/25] Draft example for MOCA clip --- examples/load_and_explore_bboxes_moca.py | 137 +++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 examples/load_and_explore_bboxes_moca.py diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py new file mode 100644 index 00000000..ae69a7ca --- /dev/null +++ b/examples/load_and_explore_bboxes_moca.py @@ -0,0 +1,137 @@ +"""Load and explore bboxes tracks +=============================== + +Load and explore an example dataset of bounding boxes tracks. +""" + +# %% +# Imports +# ------- +# For interactive plots: install ipympl with `pip install ipympl` and uncomment +# the following line in your notebook +# %matplotlib widget +from pathlib import Path + +from cycler import cycler +from matplotlib import pyplot as plt + +from movement.io import load_bboxes + +# %% +# Select sample data file +# -------------------- +# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html +# - Edit name of files to include `frame_` + +# %% +# Read file as a `movement` dataset +# ---------------------------------- +file_path = ( + Path.home() + / "swc" + / "project_movement_dataloader" + / "bboxes-datasets" + / "MoCA" + / "JPEGImages" + / "moca_crab_1_clip.csv" +) +img_dir = file_path.parent / "crab_1" + +# %% +ds = load_bboxes.from_via_tracks_file( + str(file_path), + use_frame_numbers_from_file=False, + # ATT! extracted frames are not consecutive! +) + +# print some information about the dataset +print(ds) +print("-----") +print(f"Number of individuals: {ds.sizes['individuals']}") +print(f"Number of frames: {ds.sizes['time']}") + + +# %% +# The reduced dataset contains bounding boxes for 2 individuals, tracked for +# 202 frames, in the xy plane. +# +# We can also see from the printout of the dataset that it contains +# three data arrays: ``position``, ``shape`` and ``confidence``. +# %% +# Plot trajectories of first shot and color by individual +# ----------------------------------------- + +fig, ax = plt.subplots(1, 1) + +# add color cycler to axes +plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) +# get the list of colors in the cycle +color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] + + +frame_number = 0 # ATT! extracted frames are not consecutive! +img = plt.imread(str(img_dir / f"{frame_number:05}.jpg")) + +for id_idx, id_str in enumerate(ds["individuals"].data): + # plot frame + ax.imshow(img) + + past_frames = [f for f in ds.time.data if f <= frame_number] + future_frames = [f for f in ds.time.data if f > frame_number] + + # plot past position of centroid in grey + ax.scatter( + x=ds.position.sel( + individuals=id_str, time=past_frames, space="x" + ).data, + y=ds.position.sel( + individuals=id_str, time=past_frames, space="y" + ).data, + s=1, + color="grey", + ) + + # plot future trajectories of centroids in color + ax.scatter( + x=ds.position.sel( + individuals=id_str, time=future_frames, space="x" + ).data, + y=ds.position.sel( + individuals=id_str, time=future_frames, space="y" + ).data, + s=1, + color=color_cycle[id_idx % len(color_cycle)], + ) + + # plot bbox in this frame + # ATT! currently position is the top left corner of bbox + # need to uncomment the line below if position loaded is centroid + # (after fix) + top_left_corner = ( + ds.position.sel(individuals=id_str, time=frame_number).data + # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds.shape.sel( + individuals=id_str, time=frame_number, space="x" + ).data, + height=ds.shape.sel( + individuals=id_str, time=frame_number, space="y" + ).data, + edgecolor=color_cycle[id_idx % len(color_cycle)], + facecolor="none", # transparent fill + linewidth=1.5, + ) + ax.add_patch(bbox) + + +# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) +# ax.invert_yaxis() +ax.set_aspect("equal") +ax.set_xlabel("x (pixels)") +ax.set_ylabel("y (pixels)") +ax.set_title(f"MoCA {img_dir}, frame {frame_number}") +plt.show() + +# %% From 2f236f5995ed1e1ac7b4f88cd144c677321e340a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:29:58 +0100 Subject: [PATCH 05/25] Add reindex and interpolate example --- examples/reindex_and_interpolate.py | 77 +++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 examples/reindex_and_interpolate.py diff --git a/examples/reindex_and_interpolate.py b/examples/reindex_and_interpolate.py new file mode 100644 index 00000000..7c211865 --- /dev/null +++ b/examples/reindex_and_interpolate.py @@ -0,0 +1,77 @@ +"""Reindex and interpolate bboxes tracks +=============================== + +Load and explore an example dataset of bounding boxes tracks. +""" + +# %% +from movement import sample_data +from movement.filtering import interpolate_over_time +from movement.io import load_bboxes + +# %% +# Select sample data file +# -------------------- +# For the sake of this example, we will use the path to one of +# the sample datasets provided with ``movement``. + +file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[ + "bboxes" +] +print(file_path) + +ds = load_bboxes.from_via_tracks_file( + file_path, use_frame_numbers_from_file=True +) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Only 1 in 5 frames are labelled! +print(ds) +print(ds.time) +print(ds.position.data[:, 0, :]) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Extend the dataset to every frame by forward filling +ds_ff = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method="ffill", # propagate last valid index value forward +) + +print(ds_ff.position.data[:, 0, :]) +print(ds_ff.shape.data[:, 0, :]) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Extend the dataset to every frame and fill empty values with nan +ds_nan = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method=None, # default +) + +print("Position data array:") +print(ds_nan.position.data[:11, 0, :]) + +print("Shape data array:") +print(ds_nan.shape.data[:11, 0, :]) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Linearly interpolate position and shape with nan + +ds_interp = ds_nan.copy() + +for data_array_str in ["position", "shape"]: + ds_interp[data_array_str] = interpolate_over_time( + data=ds_interp[data_array_str], + method="linear", + max_gap=None, + print_report=False, + ) + +print("Position data array:") +print(ds_interp.position.data[:11, 0, :]) + +print("Shape data array:") +print(ds_interp.shape.data[:11, 0, :]) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Export as csv file From fcee8be5e6955816c6aef7cea195b87f5dc4bc6f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:58:02 +0000 Subject: [PATCH 06/25] Remove multi crabs example --- examples/load_and_explore_bboxes.py | 94 ----------------------------- 1 file changed, 94 deletions(-) delete mode 100644 examples/load_and_explore_bboxes.py diff --git a/examples/load_and_explore_bboxes.py b/examples/load_and_explore_bboxes.py deleted file mode 100644 index b6443418..00000000 --- a/examples/load_and_explore_bboxes.py +++ /dev/null @@ -1,94 +0,0 @@ -"""Load and explore bboxes tracks -=============================== - -Load and explore an example dataset of bounding boxes tracks. -""" - -# %% -# Imports -# ------- -# For interactive plots: install ipympl with `pip install ipympl` and uncomment -# the following line in your notebook -# %matplotlib widget -from cycler import cycler -from matplotlib import pyplot as plt - -from movement import sample_data -from movement.io import load_bboxes - -# %% -# Select sample data file -# -------------------- -# For the sake of this example, we will use the path to one of -# the sample datasets provided with ``movement``. - -file_path = sample_data.fetch_dataset_paths( - "VIA_multiple-crabs_5-frames_labels.csv" -)["bboxes"] -print(file_path) - -# %% -# Read file as a `movement` dataset -# ---------------------------------- -ds = load_bboxes.from_via_tracks_file(file_path) - -# print some information about the dataset -print(ds) -print("-----") -print(f"Number of individuals: {ds.sizes['individuals']}") -print(f"Number of frames: {ds.sizes['time']}") - - -# %% -# The dataset contains bounding boxes for 86 individuals, tracked for -# 5 frames, in the xy plane. -# -# We can also see from the printout of the dataset that it contains -# three data arrays: ``position``, ``shape`` and ``confidence``. -# -# We will use these three arrays in the following sections to produce -# informative plots of the tracked trajectories -# %% -# Plot trajectories and color by individual -# ----------------------------------------- - -fig, ax = plt.subplots(1, 1) # , figsize=(15, 15)) - -# add color cycler to axes -plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) -# get the list of colors in the cycle -color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] - - -for id_idx, id_str in enumerate(ds["individuals"].data): - ax.scatter( - x=ds.position.sel(individuals=id_str, space="x").data, - y=ds.position.sel(individuals=id_str, space="y").data, - s=1, - color=color_cycle[id_idx % len(color_cycle)], - ) - # find first frame with non-nan x-coord - start_frame = ds.time[ - ~ds.position.sel(individuals="id_1", space="y").isnull().data - ][0] - ax.text( - x=ds.position.sel( - time=start_frame, individuals=id_str, space="x" - ).data, - y=ds.position.sel( - time=start_frame, individuals=id_str, space="y" - ).data, - s=str(id_str), - horizontalalignment="center", - color=color_cycle[id_idx % len(color_cycle)], - ) - -ax.invert_yaxis() # OJO! -# ax.set_ylim(0, 2160) -# ax.set_xlim(0, 4096) -ax.set_aspect("equal") -ax.set_xlabel("x (pixels)") -ax.set_ylabel("y (pixels)") -plt.show() - -# %% From 8662e157e14df7328338aba770a8e77e45f92155 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:58:11 +0000 Subject: [PATCH 07/25] Fix sherlock example --- examples/load_and_explore_bboxes_sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py index 30bf39be..559d59c9 100644 --- a/examples/load_and_explore_bboxes_sherlock.py +++ b/examples/load_and_explore_bboxes_sherlock.py @@ -113,7 +113,7 @@ # need to uncomment the line below if position loaded is centroid top_left_corner = ( ds.position.sel(individuals=id_str, time=frame_number).data - # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 + - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 ) bbox = plt.Rectangle( xy=tuple(top_left_corner), From d2c48ca249e4dd9f4e92ee4158080b3a597926b0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:58:26 +0000 Subject: [PATCH 08/25] Remove sherlock example --- examples/load_and_explore_bboxes_sherlock.py | 141 ------------------- 1 file changed, 141 deletions(-) delete mode 100644 examples/load_and_explore_bboxes_sherlock.py diff --git a/examples/load_and_explore_bboxes_sherlock.py b/examples/load_and_explore_bboxes_sherlock.py deleted file mode 100644 index 559d59c9..00000000 --- a/examples/load_and_explore_bboxes_sherlock.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Load and explore bboxes tracks -=============================== - -Load and explore an example dataset of bounding boxes tracks. -""" - -# %% -# Imports -# ------- -# For interactive plots: install ipympl with `pip install ipympl` and uncomment -# the following line in your notebook -# %matplotlib widget -from pathlib import Path - -from cycler import cycler -from matplotlib import pyplot as plt - -from movement.io import load_bboxes - -# %% -# Select sample data file -# -------------------- -# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html -# - Edit name of files to include `frame_` - -# %% -# Read file as a `movement` dataset -# ---------------------------------- -file_path = ( - Path.home() - / "Downloads" - / "face_track_annotation" - / "data" - / "sherlock_ep01_tracks_FRCNN_SM.csv" -) -ds = load_bboxes.from_via_tracks_file( - str(file_path), use_frame_numbers_from_file=True -) - -# restrict to first shot only -start_end_frames_shot_1 = (11384, 11586) -frames_shot_1 = list( - range(start_end_frames_shot_1[0], start_end_frames_shot_1[1], 1) -) -ds = ds.sel(time=frames_shot_1).copy() # I think I need copy? - -# remove individuals whose position is nan for all frames in the shot -# bool_individuals_all_nan = np.all(np.isnan(ds.position.data), axis=(0, 2)) -# ds = ds.drop_sel(individuals=ds.individuals.data[bool_individuals_all_nan]) -ds = ds.dropna(dim="individuals", how="all") - -# print some information about the dataset -print(ds) -print("-----") -print(f"Number of individuals: {ds.sizes['individuals']}") -print(f"Number of frames: {ds.sizes['time']}") - - -# %% -# The reduced dataset contains bounding boxes for 2 individuals, tracked for -# 202 frames, in the xy plane. -# -# We can also see from the printout of the dataset that it contains -# three data arrays: ``position``, ``shape`` and ``confidence``. -# %% -# Plot trajectories of first shot and color by individual -# ----------------------------------------- - -fig, ax = plt.subplots(1, 1) - -# add color cycler to axes -plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) -# get the list of colors in the cycle -color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] - - -frame_number = frames_shot_1[0] -img = plt.imread( - str(file_path.parents[1] / "images" / f"{frame_number:08}.jpg") -) - -for id_idx, id_str in enumerate(ds["individuals"].data): - # plot first frame - ax.imshow(img) - - past_frames = [f for f in frames_shot_1 if f <= frame_number] - future_frames = [f for f in frames_shot_1 if f > frame_number] - - # plot past position of centroid in grey - ax.scatter( - x=ds.position.sel( - individuals=id_str, time=past_frames, space="x" - ).data, - y=ds.position.sel( - individuals=id_str, time=past_frames, space="y" - ).data, - s=1, - color="grey", - ) - # plot future trajectories of centroids in color - ax.scatter( - x=ds.position.sel( - individuals=id_str, time=future_frames, space="x" - ).data, - y=ds.position.sel( - individuals=id_str, time=future_frames, space="y" - ).data, - s=1, - color=color_cycle[id_idx % len(color_cycle)], - ) - # plot bbox in this frame - # ATT! currently position is the top left corner of bbox - # need to uncomment the line below if position loaded is centroid - top_left_corner = ( - ds.position.sel(individuals=id_str, time=frame_number).data - - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 - ) - bbox = plt.Rectangle( - xy=tuple(top_left_corner), - width=ds.shape.sel( - individuals=id_str, time=frame_number, space="x" - ).data, - height=ds.shape.sel( - individuals=id_str, time=frame_number, space="y" - ).data, - edgecolor=color_cycle[id_idx % len(color_cycle)], - facecolor="none", # transparent fill - linewidth=1.5, - ) - ax.add_patch(bbox) - - -# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) -# ax.invert_yaxis() -ax.set_aspect("equal") -ax.set_xlabel("x (pixels)") -ax.set_ylabel("y (pixels)") -ax.set_title(f"Sherlock - shot 1, frame {frame_number}") -plt.show() - -# %% From 39d3960ac1c37e7d983a7d668780add900a7123e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 11:18:15 +0000 Subject: [PATCH 09/25] Fix moca clip centroid vs top left corner --- examples/load_and_explore_bboxes_moca.py | 155 +++++++++++------------ 1 file changed, 73 insertions(+), 82 deletions(-) diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py index ae69a7ca..ac32d1df 100644 --- a/examples/load_and_explore_bboxes_moca.py +++ b/examples/load_and_explore_bboxes_moca.py @@ -10,34 +10,25 @@ # For interactive plots: install ipympl with `pip install ipympl` and uncomment # the following line in your notebook # %matplotlib widget -from pathlib import Path +# from pathlib import Path from cycler import cycler from matplotlib import pyplot as plt +from movement import sample_data from movement.io import load_bboxes # %% # Select sample data file # -------------------- -# - Download data from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html -# - Edit name of files to include `frame_` +file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[ + "bboxes" +] +print(file_path) # %% # Read file as a `movement` dataset # ---------------------------------- -file_path = ( - Path.home() - / "swc" - / "project_movement_dataloader" - / "bboxes-datasets" - / "MoCA" - / "JPEGImages" - / "moca_crab_1_clip.csv" -) -img_dir = file_path.parent / "crab_1" - -# %% ds = load_bboxes.from_via_tracks_file( str(file_path), use_frame_numbers_from_file=False, @@ -52,14 +43,14 @@ # %% -# The reduced dataset contains bounding boxes for 2 individuals, tracked for -# 202 frames, in the xy plane. +# The dataset contains bounding boxes for 1 individual, tracked for +# 35 frames, in the xy plane. # # We can also see from the printout of the dataset that it contains # three data arrays: ``position``, ``shape`` and ``confidence``. # %% # Plot trajectories of first shot and color by individual -# ----------------------------------------- +# ------------------------------------------------------- fig, ax = plt.subplots(1, 1) @@ -69,69 +60,69 @@ color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] -frame_number = 0 # ATT! extracted frames are not consecutive! -img = plt.imread(str(img_dir / f"{frame_number:05}.jpg")) - -for id_idx, id_str in enumerate(ds["individuals"].data): - # plot frame - ax.imshow(img) - - past_frames = [f for f in ds.time.data if f <= frame_number] - future_frames = [f for f in ds.time.data if f > frame_number] - - # plot past position of centroid in grey - ax.scatter( - x=ds.position.sel( - individuals=id_str, time=past_frames, space="x" - ).data, - y=ds.position.sel( - individuals=id_str, time=past_frames, space="y" - ).data, - s=1, - color="grey", - ) - - # plot future trajectories of centroids in color - ax.scatter( - x=ds.position.sel( - individuals=id_str, time=future_frames, space="x" - ).data, - y=ds.position.sel( - individuals=id_str, time=future_frames, space="y" - ).data, - s=1, - color=color_cycle[id_idx % len(color_cycle)], - ) - - # plot bbox in this frame - # ATT! currently position is the top left corner of bbox - # need to uncomment the line below if position loaded is centroid - # (after fix) - top_left_corner = ( - ds.position.sel(individuals=id_str, time=frame_number).data - # - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 - ) - bbox = plt.Rectangle( - xy=tuple(top_left_corner), - width=ds.shape.sel( - individuals=id_str, time=frame_number, space="x" - ).data, - height=ds.shape.sel( - individuals=id_str, time=frame_number, space="y" - ).data, - edgecolor=color_cycle[id_idx % len(color_cycle)], - facecolor="none", # transparent fill - linewidth=1.5, - ) - ax.add_patch(bbox) - - -# ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) -# ax.invert_yaxis() -ax.set_aspect("equal") -ax.set_xlabel("x (pixels)") -ax.set_ylabel("y (pixels)") -ax.set_title(f"MoCA {img_dir}, frame {frame_number}") -plt.show() +# frame_number = 0 # ATT! extracted frames are not consecutive! +# img = plt.imread(str(img_dir / f"{frame_number:05}.jpg")) + +# for id_idx, id_str in enumerate(ds["individuals"].data): +# # plot frame +# ax.imshow(img) + +# past_frames = [f for f in ds.time.data if f <= frame_number] +# future_frames = [f for f in ds.time.data if f > frame_number] + +# # plot past position of centroid in grey +# ax.scatter( +# x=ds.position.sel( +# individuals=id_str, time=past_frames, space="x" +# ).data, +# y=ds.position.sel( +# individuals=id_str, time=past_frames, space="y" +# ).data, +# s=1, +# color="grey", +# ) + +# # plot future trajectories of centroids in color +# ax.scatter( +# x=ds.position.sel( +# individuals=id_str, time=future_frames, space="x" +# ).data, +# y=ds.position.sel( +# individuals=id_str, time=future_frames, space="y" +# ).data, +# s=1, +# color=color_cycle[id_idx % len(color_cycle)], +# ) + +# # plot bbox in this frame +# # ATT! currently position is the top left corner of bbox +# # need to uncomment the line below if position loaded is centroid +# # (after fix) +# top_left_corner = ( +# ds.position.sel(individuals=id_str, time=frame_number).data +# - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 +# ) +# bbox = plt.Rectangle( +# xy=tuple(top_left_corner), +# width=ds.shape.sel( +# individuals=id_str, time=frame_number, space="x" +# ).data, +# height=ds.shape.sel( +# individuals=id_str, time=frame_number, space="y" +# ).data, +# edgecolor=color_cycle[id_idx % len(color_cycle)], +# facecolor="none", # transparent fill +# linewidth=1.5, +# ) +# ax.add_patch(bbox) + + +# # ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) +# # ax.invert_yaxis() +# ax.set_aspect("equal") +# ax.set_xlabel("x (pixels)") +# ax.set_ylabel("y (pixels)") +# ax.set_title(f"MoCA {img_dir}, frame {frame_number}") +# plt.show() # %% From 4174b6d35c9ce151ce9bd3644bce95c7cceba59f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:48:33 +0000 Subject: [PATCH 10/25] Add plot to reindexing example and rename --- examples/reindex_and_interpolate.py | 77 -------- examples/reindex_and_interpolate_bboxes.py | 220 +++++++++++++++++++++ 2 files changed, 220 insertions(+), 77 deletions(-) delete mode 100644 examples/reindex_and_interpolate.py create mode 100644 examples/reindex_and_interpolate_bboxes.py diff --git a/examples/reindex_and_interpolate.py b/examples/reindex_and_interpolate.py deleted file mode 100644 index 7c211865..00000000 --- a/examples/reindex_and_interpolate.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Reindex and interpolate bboxes tracks -=============================== - -Load and explore an example dataset of bounding boxes tracks. -""" - -# %% -from movement import sample_data -from movement.filtering import interpolate_over_time -from movement.io import load_bboxes - -# %% -# Select sample data file -# -------------------- -# For the sake of this example, we will use the path to one of -# the sample datasets provided with ``movement``. - -file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[ - "bboxes" -] -print(file_path) - -ds = load_bboxes.from_via_tracks_file( - file_path, use_frame_numbers_from_file=True -) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Only 1 in 5 frames are labelled! -print(ds) -print(ds.time) -print(ds.position.data[:, 0, :]) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Extend the dataset to every frame by forward filling -ds_ff = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, - method="ffill", # propagate last valid index value forward -) - -print(ds_ff.position.data[:, 0, :]) -print(ds_ff.shape.data[:, 0, :]) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Extend the dataset to every frame and fill empty values with nan -ds_nan = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, - method=None, # default -) - -print("Position data array:") -print(ds_nan.position.data[:11, 0, :]) - -print("Shape data array:") -print(ds_nan.shape.data[:11, 0, :]) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Linearly interpolate position and shape with nan - -ds_interp = ds_nan.copy() - -for data_array_str in ["position", "shape"]: - ds_interp[data_array_str] = interpolate_over_time( - data=ds_interp[data_array_str], - method="linear", - max_gap=None, - print_report=False, - ) - -print("Position data array:") -print(ds_interp.position.data[:11, 0, :]) - -print("Shape data array:") -print(ds_interp.shape.data[:11, 0, :]) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Export as csv file diff --git a/examples/reindex_and_interpolate_bboxes.py b/examples/reindex_and_interpolate_bboxes.py new file mode 100644 index 00000000..cdddba8f --- /dev/null +++ b/examples/reindex_and_interpolate_bboxes.py @@ -0,0 +1,220 @@ +"""Reindex and interpolate bounding boxes tracks +=============================== + +Load an example dataset of bounding boxes' tracks and reindex +it to every frame. +""" + +# %% +import math + +import sleap_io as sio +from cycler import cycler +from matplotlib import pyplot as plt + +from movement import sample_data +from movement.filtering import interpolate_over_time +from movement.io import load_bboxes + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Select sample data file +# -------------------- +# For this example, we will use the path to one of +# the sample datasets provided with ``movement``. + +dataset_dict = sample_data.fetch_dataset_paths( + "VIA_single-crab_MOCA-crab-1.csv", + with_video=True, # for visualisation +) + +file_path = dataset_dict["bboxes"] +print(file_path) + +ds = load_bboxes.from_via_tracks_file( + file_path, use_frame_numbers_from_file=True +) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Only 1 in 5 frames are annotated, plus the last frame (167) +print(ds) +print("-----") +print(ds.time) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Extend the dataset to every frame by forward filling +# The position and shape data arrays are filled with the last valid value +# So position and shape are kept constant when no annotation is available +ds_ff = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method="ffill", # propagate last valid index value forward +) + +print("Position data array (first 14 frames):") +print(ds_ff.position.data[:14, 0, :]) # time, individual, space + +print("----") +print("Shape data array (first 14 frames):") +print(ds_ff.shape.data[:14, 0, :]) # time, individual, space + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Extend the dataset to every frame and fill empty values with nan +ds_nan = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method=None, # default +) + +print("Position data array (first 14 frames):") +print(ds_nan.position.data[:14, 0, :]) + +print("----") +print("Shape data array (first 14 frames):") +print(ds_nan.shape.data[:14, 0, :]) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Linearly interpolate position and shape with nan + +ds_interp = ds_nan.copy() + +for data_array_str in ["position", "shape"]: + ds_interp[data_array_str] = interpolate_over_time( + data=ds_interp[data_array_str], + method="linear", + max_gap=None, + print_report=False, + ) + +print("Position data array (first 14 frames):") +print(ds_interp.position.data[:14, 0, :]) + +print("----") +print("Shape data array (first 14 frames):") +print(ds_interp.shape.data[:14, 0, :]) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Inspect associated video + +video_path = dataset_dict["video"] + + +video = sio.load_video(video_path) + +n_frames, height, width, channels = video.shape + +print(f"Number of frames: {n_frames}") # The video contains all frames +print(f"Frame size: {width}x{height}") +print(f"Number of channels: {channels}") + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Plot data +# OJO camera movement + +# select indices of data to plot +data_start_idx = 0 +data_end_idx = 11 + +# initialise figure +fig = plt.figure(figsize=(15, 12)) + +# add color cycler to axes +plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) +color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] + +# loop over data and plot over corresponding frame +for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): + # add subplot axes + ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1) + + # plot frame + ax.imshow( + video[ds.time[data_idx].item()] + ) # the video is indexed at every frame! use frame number as index + + # plot annotated boxes + top_left_corner = ( + ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds.shape[data_idx, 0, 0].data, # x coord + height=ds.shape[data_idx, 0, 1].data, # y coord of shape array + edgecolor=color_cycle[0], # [data_idx % len(color_cycle)], + facecolor="none", # transparent fill + linewidth=1.5, + ) + ax.add_patch(bbox) + + ax.set_title(f"Frame {ds.time[data_idx].item()}") + +fig.tight_layout() + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Compare interpolation methods + +# select frames to inspect +frame_number_start = 0 +frame_number_end = 6 + +# add color cycler to axes +plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) +color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] + + +# initialise figure +fig = plt.figure(figsize=(15, 12)) + + +# loop over data and plot over corresponding frame +for frame_n in range(frame_number_start, frame_number_end): + # add subplot axes + ax = plt.subplot(1, 6, frame_n + 1) + + # plot frame + ax.imshow(video[frame_n]) + # the video is indexed at every frame! use frame number as index + + # plot bounding box: box and centroid + for ds_i, ds in enumerate([ds_nan, ds_ff, ds_interp]): + # plot box + top_left_corner = ( + ds.position.sel(time=frame_n, individuals="id_1").data + - ds.shape.sel(time=frame_n, individuals="id_1").data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds.shape.sel( + time=frame_n, individuals="id_1", space="x" + ).data, # x coord + height=ds.shape.sel( + time=frame_n, individuals="id_1", space="y" + ).data, # y coord of shape array + edgecolor=color_cycle[ds_i], + facecolor="none", # transparent fill + linewidth=[4.5, 1.5, 1.5][ds_i], + linestyle=["dotted", "solid", "solid"][ds_i], + label=["nan", "ffill", "linear"][ds_i], + ) + ax.add_patch(bbox) + + # plot centroid + ax.scatter( + x=ds.position.sel( + time=frame_n, individuals="id_1", space="x" + ).data, + y=ds.position.sel( + time=frame_n, individuals="id_1", space="y" + ).data, + s=5, + color=color_cycle[ds_i], + ) + + if frame_n == 0: + ax.legend() + ax.set_title(f"Frame {frame_n}") + +fig.tight_layout() + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Export as csv file From 3d8331958c539435b5e90de919a4db2d5382e8b3 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:16:13 +0000 Subject: [PATCH 11/25] Format for example and rename --- examples/load_and_explore_bboxes_moca.py | 128 -------- examples/load_and_reindex_bboxes.py | 323 +++++++++++++++++++++ examples/reindex_and_interpolate_bboxes.py | 220 -------------- 3 files changed, 323 insertions(+), 348 deletions(-) delete mode 100644 examples/load_and_explore_bboxes_moca.py create mode 100644 examples/load_and_reindex_bboxes.py delete mode 100644 examples/reindex_and_interpolate_bboxes.py diff --git a/examples/load_and_explore_bboxes_moca.py b/examples/load_and_explore_bboxes_moca.py deleted file mode 100644 index ac32d1df..00000000 --- a/examples/load_and_explore_bboxes_moca.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Load and explore bboxes tracks -=============================== - -Load and explore an example dataset of bounding boxes tracks. -""" - -# %% -# Imports -# ------- -# For interactive plots: install ipympl with `pip install ipympl` and uncomment -# the following line in your notebook -# %matplotlib widget -# from pathlib import Path - -from cycler import cycler -from matplotlib import pyplot as plt - -from movement import sample_data -from movement.io import load_bboxes - -# %% -# Select sample data file -# -------------------- -file_path = sample_data.fetch_dataset_paths("VIA_single-crab_MOCA-crab-1.csv")[ - "bboxes" -] -print(file_path) - -# %% -# Read file as a `movement` dataset -# ---------------------------------- -ds = load_bboxes.from_via_tracks_file( - str(file_path), - use_frame_numbers_from_file=False, - # ATT! extracted frames are not consecutive! -) - -# print some information about the dataset -print(ds) -print("-----") -print(f"Number of individuals: {ds.sizes['individuals']}") -print(f"Number of frames: {ds.sizes['time']}") - - -# %% -# The dataset contains bounding boxes for 1 individual, tracked for -# 35 frames, in the xy plane. -# -# We can also see from the printout of the dataset that it contains -# three data arrays: ``position``, ``shape`` and ``confidence``. -# %% -# Plot trajectories of first shot and color by individual -# ------------------------------------------------------- - -fig, ax = plt.subplots(1, 1) - -# add color cycler to axes -plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) -# get the list of colors in the cycle -color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] - - -# frame_number = 0 # ATT! extracted frames are not consecutive! -# img = plt.imread(str(img_dir / f"{frame_number:05}.jpg")) - -# for id_idx, id_str in enumerate(ds["individuals"].data): -# # plot frame -# ax.imshow(img) - -# past_frames = [f for f in ds.time.data if f <= frame_number] -# future_frames = [f for f in ds.time.data if f > frame_number] - -# # plot past position of centroid in grey -# ax.scatter( -# x=ds.position.sel( -# individuals=id_str, time=past_frames, space="x" -# ).data, -# y=ds.position.sel( -# individuals=id_str, time=past_frames, space="y" -# ).data, -# s=1, -# color="grey", -# ) - -# # plot future trajectories of centroids in color -# ax.scatter( -# x=ds.position.sel( -# individuals=id_str, time=future_frames, space="x" -# ).data, -# y=ds.position.sel( -# individuals=id_str, time=future_frames, space="y" -# ).data, -# s=1, -# color=color_cycle[id_idx % len(color_cycle)], -# ) - -# # plot bbox in this frame -# # ATT! currently position is the top left corner of bbox -# # need to uncomment the line below if position loaded is centroid -# # (after fix) -# top_left_corner = ( -# ds.position.sel(individuals=id_str, time=frame_number).data -# - ds.shape.sel(individuals=id_str, time=frame_number).data / 2 -# ) -# bbox = plt.Rectangle( -# xy=tuple(top_left_corner), -# width=ds.shape.sel( -# individuals=id_str, time=frame_number, space="x" -# ).data, -# height=ds.shape.sel( -# individuals=id_str, time=frame_number, space="y" -# ).data, -# edgecolor=color_cycle[id_idx % len(color_cycle)], -# facecolor="none", # transparent fill -# linewidth=1.5, -# ) -# ax.add_patch(bbox) - - -# # ax.legend(ds["individuals"].data, bbox_to_anchor=(1.0, 1.0)) -# # ax.invert_yaxis() -# ax.set_aspect("equal") -# ax.set_xlabel("x (pixels)") -# ax.set_ylabel("y (pixels)") -# ax.set_title(f"MoCA {img_dir}, frame {frame_number}") -# plt.show() - -# %% diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py new file mode 100644 index 00000000..c1798ff5 --- /dev/null +++ b/examples/load_and_reindex_bboxes.py @@ -0,0 +1,323 @@ +"""Load and reindex bounding boxes tracks +========================================== + +Load an example dataset of bounding boxes' tracks and reindex +it to every frame. +""" + +# %% +# Imports +# ------- + +# For interactive plots: install ipympl with `pip install ipympl` and uncomment +# the following line in your notebook +# %matplotlib widget +import csv +import math + +import sleap_io as sio +from matplotlib import pyplot as plt + +from movement import sample_data +from movement.filtering import interpolate_over_time +from movement.io import load_bboxes + +# %% +# Load sample dataset +# ------------------------ +# In this tutorial, we will use a sample bounding boxes dataset with +# a single individual (a crab). +# +# We will also download the associated video for visualising the data later. + +dataset_dict = sample_data.fetch_dataset_paths( + "VIA_single-crab_MOCA-crab-1.csv", + with_video=True, # download associated video +) + +file_path = dataset_dict["bboxes"] +print(file_path) + +ds = load_bboxes.from_via_tracks_file( + file_path, use_frame_numbers_from_file=True +) + +# %% +# The loaded dataset is made up of three data arrays: +# ``position``, ``shape``, and ``confidence``. +print(ds) + +# %% +# We can see the coordinates in the time dimension are expressed in frames, +# and that only 1 in 5 frames of the video are annotated, plus +# the last frame (167). +# +# In the following sections of the notebook we will explore options to reindex +# the dataset, and fill in the missing frames with reasonable values. +print(ds.time) + +# %% +# Inspect associated video +# -------------------------------- +# The video associated to the data contains all 168 frames. + +video_path = dataset_dict["video"] + +video = sio.load_video(video_path) +n_frames, height, width, channels = video.shape + +print(f"Number of frames: {n_frames}") +print(f"Frame size: {width}x{height}") +print(f"Number of channels: {channels}") + + +# %% +# We can plot the data over the corresponding video frames to +# visualise the bounding boxes around the tracked crab. +# +# Let's focus on the first 15 frames of the video, and plot the annotated +# bounding box and centroid at each frame. The centroid at each frame is +# marked as a blue marker with a red ring. The past centroid positions are +# shown in blue and the future centroid positions in white. +# +# Note that in this case the camera is not static relative to the scene. + +# select indices of data to plot +data_start_idx = 0 +data_end_idx = 15 + +# initialise figure +fig = plt.figure(figsize=(15, 12)) + +# get list of colors for plotting +list_colors = plt.get_cmap("tab10").colors + +# loop over data and plot over corresponding frame +for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): + # add subplot axes + ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1) + + # plot frame + # note: the video is indexed at every frame, so + # we use the frame number as index + ax.imshow(video[ds.time[data_idx].item()]) + + # plot box at this frame + top_left_corner = ( + ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds.shape[data_idx, 0, 0].data, # x coordinate of shape array + height=ds.shape[data_idx, 0, 1].data, # y coordinate of shape array + edgecolor=list_colors[0], + facecolor="none", + linewidth=1.5, + ) + ax.add_patch(bbox) + + # plot box's centroid at this frame with red ring + ax.scatter( + x=ds.position[data_idx, 0, 0].data, + y=ds.position[data_idx, 0, 1].data, + s=15, + color=list_colors[0], + edgecolors="red", + ) + + # plot past centroid positions in blue + ax.scatter( + x=ds.position[:data_idx, 0, 0].data, + y=ds.position[:data_idx, 0, 1].data, + s=5, + color=list_colors[0], + ) + + # plot future centroid positionsin white + ax.scatter( + x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data, + y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data, + s=5, + color="white", + ) + + ax.set_title(f"Frame {ds.time[data_idx].item()}") + ax.set_xlabel("x (pixles)") + ax.set_ylabel("y (pixels)") + ax.set_xlabel("") + +fig.tight_layout() + + +# %% +# Fill in empty values with forward filling +# ---------------------------------------------------- +# We can fill in the frames with missing values for the ``position`` and +# ``shape`` arrays by taking the last valid value in time. In this way, a +# box's position and shape stay constant if for a current frame the box +# has no annotation defined. + +ds_ff = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method="ffill", # propagate last valid index value forward +) + +# check the first 14 frames of the data +print("Position data array (first 14 frames):") +print(ds_ff.position.data[:14, 0, :]) # time, individual, space + +print("----") +print("Shape data array (first 14 frames):") +print(ds_ff.shape.data[:14, 0, :]) # time, individual, space + +# %% +# Fill in empty values with NaN +# ---------------------------------------------------- +# Alternatively, we can fill in the missing frames with NaN values. +# This can be useful if we want to interpolate the missing values later. +ds_nan = ds.reindex( + {"time": list(range(ds.time[-1].item()))}, + method=None, # default +) + +# check the first 14 frames of the data +print("Position data array (first 14 frames):") +print(ds_nan.position.data[:14, 0, :]) + +print("----") +print("Shape data array (first 14 frames):") +print(ds_nan.shape.data[:14, 0, :]) + +# %% +# Linearly interpolate NaN values +# ---------------------------------------------------------- +# We can instead fill in the missing values in the dataset applying linear +# interpolation to the ``position`` and ``shape`` data arrays. In this way, +# we would be assuming that the centroid of the bounding box moves linearly +# between the two annotated values, and its width and height change linearly +# as well. +# +# We use the dataset with NaN values as an input to the +# ``interpolate_over_time`` function. +ds_interp = ds_nan.copy() + +for data_array_str in ["position", "shape"]: + ds_interp[data_array_str] = interpolate_over_time( + data=ds_interp[data_array_str], + method="linear", + max_gap=None, + print_report=False, + ) + +# check the first 14 frames of the data +print("Position data array (first 14 frames):") +print(ds_interp.position.data[:14, 0, :]) + +print("----") +print("Shape data array (first 14 frames):") +print(ds_interp.shape.data[:14, 0, :]) + + +# %% +# Compare interpolation methods +# ------------------------------ +# We can now qualitatively compare the three different methods of filling +# in the missing frames, by plotting the bounding boxes +# for the first 6 frames of the video. +# +# Remember only frames 0 and 5 are annotated in the original dataset. These +# are plotted in blue, while the forward filled values are plotted in orange +# and the linearly interpolated values in green. + +# initialise figure +fig = plt.figure(figsize=(15, 12)) + +# loop over frames +for frame_n in range(5): + # add subplot axes + ax = plt.subplot(1, 6, frame_n + 1) + + # plot frame + # note: the video is indexed at every frame, so + # we use the frame number as index + ax.imshow(video[frame_n]) + + # plot bounding box for each dataset + for ds_i, ds_one in enumerate([ds_nan, ds_ff, ds_interp]): + # plot box + top_left_corner = ( + ds_one.position.sel(time=frame_n, individuals="id_1").data + - ds_one.shape.sel(time=frame_n, individuals="id_1").data / 2 + ) + bbox = plt.Rectangle( + xy=tuple(top_left_corner), + width=ds_one.shape.sel( + time=frame_n, individuals="id_1", space="x" + ).data, + height=ds_one.shape.sel( + time=frame_n, individuals="id_1", space="y" + ).data, + edgecolor=list_colors[ds_i], + facecolor="none", + # make line for NaN dataset thicker and dotted + linewidth=[5, 1.5, 1.5][ds_i], + linestyle=["dotted", "solid", "solid"][ds_i], + label=["nan", "ffill", "linear"][ds_i], + ) + ax.add_patch(bbox) + + # plot centroid + ax.scatter( + x=ds_one.position.sel( + time=frame_n, individuals="id_1", space="x" + ).data, + y=ds_one.position.sel( + time=frame_n, individuals="id_1", space="y" + ).data, + s=5, + color=list_colors[ds_i], + ) + + # add legend to first frame + if frame_n == 0: + ax.legend() + ax.set_title(f"Frame {frame_n}") + +fig.tight_layout() + +# %% +# Export as .csv file +# ------------------- +# Let's assume the dataset with the forward filled values is the best suited +# for our task - we can now export the computed values to a .csv file +# +# Note that we currently do not provide explicit methods to export a +# ``movement`` bounding boxes dataset in a specific format. However, we can +# easily save the bounding boxes’ trajectories to a .csv file using the +# standard Python library ``csv``. + +# define name for output csv file +filepath = "tracking_output.csv" + +# open the csv file in write mode +with open(filepath, mode="w", newline="") as file: + writer = csv.writer(file) + + # write the header + writer.writerow( + ["frame_idx", "bbox_ID", "x", "y", "width", "height", "confidence"] + ) + + # write the data + for individual in ds.individuals.data: + for frame in ds.time.data: + x, y = ds.position.sel(time=frame, individuals=individual).data + width, height = ds.shape.sel( + time=frame, individuals=individual + ).data + confidence = ds.confidence.sel( + time=frame, individuals=individual + ).data + writer.writerow( + [frame, individual, x, y, width, height, confidence] + ) diff --git a/examples/reindex_and_interpolate_bboxes.py b/examples/reindex_and_interpolate_bboxes.py deleted file mode 100644 index cdddba8f..00000000 --- a/examples/reindex_and_interpolate_bboxes.py +++ /dev/null @@ -1,220 +0,0 @@ -"""Reindex and interpolate bounding boxes tracks -=============================== - -Load an example dataset of bounding boxes' tracks and reindex -it to every frame. -""" - -# %% -import math - -import sleap_io as sio -from cycler import cycler -from matplotlib import pyplot as plt - -from movement import sample_data -from movement.filtering import interpolate_over_time -from movement.io import load_bboxes - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Select sample data file -# -------------------- -# For this example, we will use the path to one of -# the sample datasets provided with ``movement``. - -dataset_dict = sample_data.fetch_dataset_paths( - "VIA_single-crab_MOCA-crab-1.csv", - with_video=True, # for visualisation -) - -file_path = dataset_dict["bboxes"] -print(file_path) - -ds = load_bboxes.from_via_tracks_file( - file_path, use_frame_numbers_from_file=True -) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Only 1 in 5 frames are annotated, plus the last frame (167) -print(ds) -print("-----") -print(ds.time) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Extend the dataset to every frame by forward filling -# The position and shape data arrays are filled with the last valid value -# So position and shape are kept constant when no annotation is available -ds_ff = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, - method="ffill", # propagate last valid index value forward -) - -print("Position data array (first 14 frames):") -print(ds_ff.position.data[:14, 0, :]) # time, individual, space - -print("----") -print("Shape data array (first 14 frames):") -print(ds_ff.shape.data[:14, 0, :]) # time, individual, space - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Extend the dataset to every frame and fill empty values with nan -ds_nan = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, - method=None, # default -) - -print("Position data array (first 14 frames):") -print(ds_nan.position.data[:14, 0, :]) - -print("----") -print("Shape data array (first 14 frames):") -print(ds_nan.shape.data[:14, 0, :]) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Linearly interpolate position and shape with nan - -ds_interp = ds_nan.copy() - -for data_array_str in ["position", "shape"]: - ds_interp[data_array_str] = interpolate_over_time( - data=ds_interp[data_array_str], - method="linear", - max_gap=None, - print_report=False, - ) - -print("Position data array (first 14 frames):") -print(ds_interp.position.data[:14, 0, :]) - -print("----") -print("Shape data array (first 14 frames):") -print(ds_interp.shape.data[:14, 0, :]) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Inspect associated video - -video_path = dataset_dict["video"] - - -video = sio.load_video(video_path) - -n_frames, height, width, channels = video.shape - -print(f"Number of frames: {n_frames}") # The video contains all frames -print(f"Frame size: {width}x{height}") -print(f"Number of channels: {channels}") - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Plot data -# OJO camera movement - -# select indices of data to plot -data_start_idx = 0 -data_end_idx = 11 - -# initialise figure -fig = plt.figure(figsize=(15, 12)) - -# add color cycler to axes -plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) -color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] - -# loop over data and plot over corresponding frame -for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): - # add subplot axes - ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1) - - # plot frame - ax.imshow( - video[ds.time[data_idx].item()] - ) # the video is indexed at every frame! use frame number as index - - # plot annotated boxes - top_left_corner = ( - ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2 - ) - bbox = plt.Rectangle( - xy=tuple(top_left_corner), - width=ds.shape[data_idx, 0, 0].data, # x coord - height=ds.shape[data_idx, 0, 1].data, # y coord of shape array - edgecolor=color_cycle[0], # [data_idx % len(color_cycle)], - facecolor="none", # transparent fill - linewidth=1.5, - ) - ax.add_patch(bbox) - - ax.set_title(f"Frame {ds.time[data_idx].item()}") - -fig.tight_layout() - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Compare interpolation methods - -# select frames to inspect -frame_number_start = 0 -frame_number_end = 6 - -# add color cycler to axes -plt.rcParams["axes.prop_cycle"] = cycler(color=plt.get_cmap("tab10").colors) -color_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"] - - -# initialise figure -fig = plt.figure(figsize=(15, 12)) - - -# loop over data and plot over corresponding frame -for frame_n in range(frame_number_start, frame_number_end): - # add subplot axes - ax = plt.subplot(1, 6, frame_n + 1) - - # plot frame - ax.imshow(video[frame_n]) - # the video is indexed at every frame! use frame number as index - - # plot bounding box: box and centroid - for ds_i, ds in enumerate([ds_nan, ds_ff, ds_interp]): - # plot box - top_left_corner = ( - ds.position.sel(time=frame_n, individuals="id_1").data - - ds.shape.sel(time=frame_n, individuals="id_1").data / 2 - ) - bbox = plt.Rectangle( - xy=tuple(top_left_corner), - width=ds.shape.sel( - time=frame_n, individuals="id_1", space="x" - ).data, # x coord - height=ds.shape.sel( - time=frame_n, individuals="id_1", space="y" - ).data, # y coord of shape array - edgecolor=color_cycle[ds_i], - facecolor="none", # transparent fill - linewidth=[4.5, 1.5, 1.5][ds_i], - linestyle=["dotted", "solid", "solid"][ds_i], - label=["nan", "ffill", "linear"][ds_i], - ) - ax.add_patch(bbox) - - # plot centroid - ax.scatter( - x=ds.position.sel( - time=frame_n, individuals="id_1", space="x" - ).data, - y=ds.position.sel( - time=frame_n, individuals="id_1", space="y" - ).data, - s=5, - color=color_cycle[ds_i], - ) - - if frame_n == 0: - ax.legend() - ax.set_title(f"Frame {frame_n}") - -fig.tight_layout() - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Export as csv file From 9cd39a52acf92312bac076d26d7968dbb0457fdf Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:17:36 +0000 Subject: [PATCH 12/25] Fix to input/output snippet --- docs/source/user_guide/input_output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md index 6cc1f2a4..d887b98d 100644 --- a/docs/source/user_guide/input_output.md +++ b/docs/source/user_guide/input_output.md @@ -238,7 +238,7 @@ Here is an example of how you can save a bounding boxes dataset to a .csv file: ```python # define name for output csv file -file = 'tracking_output.csv" +filepath = "tracking_output.csv" # open the csv file in write mode with open(filepath, mode="w", newline="") as file: From 49b85825e50ee13ee225d0bd8e46bee52e3bcf27 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:36:31 +0000 Subject: [PATCH 13/25] Make figure sizes decent --- examples/load_and_reindex_bboxes.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py index c1798ff5..818c70b4 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_reindex_bboxes.py @@ -14,6 +14,7 @@ # %matplotlib widget import csv import math +import os import sleap_io as sio from matplotlib import pyplot as plt @@ -26,7 +27,8 @@ # Load sample dataset # ------------------------ # In this tutorial, we will use a sample bounding boxes dataset with -# a single individual (a crab). +# a single individual (a crab). The clip is part of the `Moving +# Camouflaged Animals Dataset (MoCA) dataset `_. # # We will also download the associated video for visualising the data later. @@ -49,11 +51,11 @@ # %% # We can see the coordinates in the time dimension are expressed in frames, -# and that only 1 in 5 frames of the video are annotated, plus +# and that we only have data for 1 in 5 frames of the video, plus # the last frame (167). # # In the following sections of the notebook we will explore options to reindex -# the dataset, and fill in the missing frames with reasonable values. +# the dataset and fill in values for the frames with missing data. print(ds.time) # %% @@ -87,7 +89,7 @@ data_end_idx = 15 # initialise figure -fig = plt.figure(figsize=(15, 12)) +fig = plt.figure(figsize=(8, 20)) # width, height # get list of colors for plotting list_colors = plt.get_cmap("tab10").colors @@ -95,7 +97,7 @@ # loop over data and plot over corresponding frame for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): # add subplot axes - ax = plt.subplot(math.ceil(data_end_idx / 5), 5, p_i + 1) + ax = plt.subplot(math.ceil(data_end_idx / 2), 2, p_i + 1) # plot frame # note: the video is indexed at every frame, so @@ -230,12 +232,12 @@ # and the linearly interpolated values in green. # initialise figure -fig = plt.figure(figsize=(15, 12)) +fig = plt.figure(figsize=(8, 8)) # loop over frames -for frame_n in range(5): +for frame_n in range(6): # add subplot axes - ax = plt.subplot(1, 6, frame_n + 1) + ax = plt.subplot(3, 2, frame_n + 1) # plot frame # note: the video is indexed at every frame, so @@ -282,6 +284,8 @@ if frame_n == 0: ax.legend() ax.set_title(f"Frame {frame_n}") + ax.set_xlabel("x (pixels)") + ax.set_ylabel("y (pixels)") fig.tight_layout() @@ -321,3 +325,11 @@ writer.writerow( [frame, individual, x, y, width, height, confidence] ) + +# %% +# Remove the output file +# ---------------------- +# We can remove the output file we have just created. +# "nbsphinx": "hidden" + +os.remove(filepath) From b241cf9711eee32764e0acdd55b08f47c0be78e6 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:54:39 +0000 Subject: [PATCH 14/25] Select thumbnail --- examples/load_and_reindex_bboxes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py index 818c70b4..78a50318 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_reindex_bboxes.py @@ -231,6 +231,8 @@ # are plotted in blue, while the forward filled values are plotted in orange # and the linearly interpolated values in green. +# sphinx_gallery_thumbnail_number = 2 + # initialise figure fig = plt.figure(figsize=(8, 8)) @@ -329,7 +331,6 @@ # %% # Remove the output file # ---------------------- -# We can remove the output file we have just created. -# "nbsphinx": "hidden" - +# To remove the output file we have just created, we can run the following +# code. os.remove(filepath) From 7c5af56107eb806415df610a41c5dd5c882748d8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:04:08 +0000 Subject: [PATCH 15/25] Apply suggestions from code review Co-authored-by: Niko Sirmpilatze --- examples/load_and_reindex_bboxes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py index 78a50318..e6f020e3 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_reindex_bboxes.py @@ -50,12 +50,12 @@ print(ds) # %% -# We can see the coordinates in the time dimension are expressed in frames, +# We can see that coordinates in the time dimension are expressed in frames, # and that we only have data for 1 in 5 frames of the video, plus # the last frame (167). # -# In the following sections of the notebook we will explore options to reindex -# the dataset and fill in values for the frames with missing data. +# In the following sections of the notebook we will explore options to upsample +# the dataset by filling in values for video frames with no data. print(ds.time) # %% From 47f0c36c55efb814b334fe95603e096721a83c2b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:15:40 +0000 Subject: [PATCH 16/25] Reduce number of demo frames showed at the start --- examples/load_and_reindex_bboxes.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py index e6f020e3..dd928d95 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_reindex_bboxes.py @@ -77,19 +77,20 @@ # We can plot the data over the corresponding video frames to # visualise the bounding boxes around the tracked crab. # -# Let's focus on the first 15 frames of the video, and plot the annotated -# bounding box and centroid at each frame. The centroid at each frame is -# marked as a blue marker with a red ring. The past centroid positions are -# shown in blue and the future centroid positions in white. +# Let's inspect the first 6 frames of the video for which we have +# annotations, and plot the annotated bounding box and centroid at each frame. +# The centroid at each frame is marked as a blue marker with a red ring. +# The past centroid positions are shown in blue and the future centroid +# positions in white. # # Note that in this case the camera is not static relative to the scene. # select indices of data to plot data_start_idx = 0 -data_end_idx = 15 +data_end_idx = 6 # initialise figure -fig = plt.figure(figsize=(8, 20)) # width, height +fig = plt.figure(figsize=(8, 10)) # width, height # get list of colors for plotting list_colors = plt.get_cmap("tab10").colors From 9a8f74ed226aac0036879d36c5b6d7287227cdfb Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:17:25 +0000 Subject: [PATCH 17/25] Rename last section --- examples/load_and_reindex_bboxes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_reindex_bboxes.py index dd928d95..7ac00c2b 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_reindex_bboxes.py @@ -330,7 +330,7 @@ ) # %% -# Remove the output file +# Clean-up # ---------------------- # To remove the output file we have just created, we can run the following # code. From c449c106310d350a0d984ea473866af08098d047 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:27:14 +0000 Subject: [PATCH 18/25] Change title and tagline --- ...oad_and_reindex_bboxes.py => load_and_upsample_bboxes.py} | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename examples/{load_and_reindex_bboxes.py => load_and_upsample_bboxes.py} (98%) diff --git a/examples/load_and_reindex_bboxes.py b/examples/load_and_upsample_bboxes.py similarity index 98% rename from examples/load_and_reindex_bboxes.py rename to examples/load_and_upsample_bboxes.py index 7ac00c2b..ff76c846 100644 --- a/examples/load_and_reindex_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -1,8 +1,7 @@ -"""Load and reindex bounding boxes tracks +"""Load and upsample bounding boxes tracks ========================================== -Load an example dataset of bounding boxes' tracks and reindex -it to every frame. +Load bounding boxes tracks and upsample them to match the video frame rate. """ # %% From 1cda92e44dc79e53c256ff6c0f209cc4a3d73376 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:49:38 +0000 Subject: [PATCH 19/25] Add plots to check data imputation (rather than printing) and make colors consistent with the rest of the notebook. --- examples/load_and_upsample_bboxes.py | 220 ++++++++++++++++++++------- 1 file changed, 163 insertions(+), 57 deletions(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index ff76c846..bdb289e6 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -27,7 +27,8 @@ # ------------------------ # In this tutorial, we will use a sample bounding boxes dataset with # a single individual (a crab). The clip is part of the `Moving -# Camouflaged Animals Dataset (MoCA) dataset `_. +# Camouflaged Animals Dataset (MoCA) dataset +# `_. # # We will also download the associated video for visualising the data later. @@ -49,13 +50,16 @@ print(ds) # %% -# We can see that coordinates in the time dimension are expressed in frames, -# and that we only have data for 1 in 5 frames of the video, plus -# the last frame (167). +# We can see that the coordinates in the time dimension are expressed in +# frames, and that we only have data for 1 in 5 frames of the video, plus +# the last frame (frame number 167). + +print(ds.time) + +# %% # # In the following sections of the notebook we will explore options to upsample -# the dataset by filling in values for video frames with no data. -print(ds.time) +# the dataset by filling in values for the video frames with no data. # %% # Inspect associated video @@ -73,26 +77,15 @@ # %% -# We can plot the data over the corresponding video frames to -# visualise the bounding boxes around the tracked crab. -# # Let's inspect the first 6 frames of the video for which we have # annotations, and plot the annotated bounding box and centroid at each frame. -# The centroid at each frame is marked as a blue marker with a red ring. -# The past centroid positions are shown in blue and the future centroid -# positions in white. -# -# Note that in this case the camera is not static relative to the scene. # select indices of data to plot data_start_idx = 0 data_end_idx = 6 # initialise figure -fig = plt.figure(figsize=(8, 10)) # width, height - -# get list of colors for plotting -list_colors = plt.get_cmap("tab10").colors +fig = plt.figure(figsize=(8, 8)) # width, height # loop over data and plot over corresponding frame for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): @@ -112,9 +105,10 @@ xy=tuple(top_left_corner), width=ds.shape[data_idx, 0, 0].data, # x coordinate of shape array height=ds.shape[data_idx, 0, 1].data, # y coordinate of shape array - edgecolor=list_colors[0], + edgecolor="red", facecolor="none", linewidth=1.5, + label="current frame", ) ax.add_patch(bbox) @@ -123,33 +117,42 @@ x=ds.position[data_idx, 0, 0].data, y=ds.position[data_idx, 0, 1].data, s=15, - color=list_colors[0], - edgecolors="red", + color="red", ) # plot past centroid positions in blue - ax.scatter( - x=ds.position[:data_idx, 0, 0].data, - y=ds.position[:data_idx, 0, 1].data, - s=5, - color=list_colors[0], - ) + if data_idx > 0: + ax.scatter( + x=ds.position[0:data_idx, 0, 0].data, + y=ds.position[0:data_idx, 0, 1].data, + s=5, + color="tab:blue", + label="past frames", + ) - # plot future centroid positionsin white + # plot future centroid positions in white ax.scatter( x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data, y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data, s=5, color="white", + label="future frames", ) ax.set_title(f"Frame {ds.time[data_idx].item()}") ax.set_xlabel("x (pixles)") ax.set_ylabel("y (pixels)") ax.set_xlabel("") + if p_i == 1: + ax.legend() fig.tight_layout() +# %% +# +# The centroid at each frame is marked with a red marker. The past centroid +# positions are shown in blue and the future centroid positions in white. +# Note that in this case the camera is not static relative to the environment. # %% # Fill in empty values with forward filling @@ -164,13 +167,44 @@ method="ffill", # propagate last valid index value forward ) -# check the first 14 frames of the data -print("Position data array (first 14 frames):") -print(ds_ff.position.data[:14, 0, :]) # time, individual, space +# %% +# We can verify with a plot that the missing values have been filled in +# using the last valid value in time. + +# In the plot below, the original position and shape data is shown in black, +# while the forward-filled values are shown in blue. + +fig, axs = plt.subplots(2, 2, figsize=(8, 6)) +for row in range(axs.shape[0]): + space_coord = ["x", "y"][row] + for col in range(axs.shape[1]): + ax = axs[row, col] + data_array_str = ["position", "shape"][col] + # plot original data + ax.scatter( + x=ds.time, + y=ds[data_array_str].sel(individuals="id_1", space=space_coord), + marker="o", + color="black", + label="original data", + ) + # plot forward filled data + ax.plot( + ds_ff.time, + ds_ff[data_array_str].sel(individuals="id_1", space=space_coord), + marker=".", + linewidth=1, + color="tab:green", + label="upsampled data", + ) + ax.set_ylabel(f"{space_coord} (pixels)") + if row == 0: + ax.set_title(f"Bounding box {data_array_str}") + if col == 1: + ax.legend() + if row == 1: + ax.set_xlabel("time (frames)") -print("----") -print("Shape data array (first 14 frames):") -print(ds_ff.shape.data[:14, 0, :]) # time, individual, space # %% # Fill in empty values with NaN @@ -182,19 +216,54 @@ method=None, # default ) -# check the first 14 frames of the data -print("Position data array (first 14 frames):") -print(ds_nan.position.data[:14, 0, :]) +# %% +# Like before, we can verify with a plot that the missing values have been +# filled with NaN values. +fig, axs = plt.subplots(2, 2, figsize=(8, 6)) +for row in range(axs.shape[0]): + space_coord = ["x", "y"][row] + for col in range(axs.shape[1]): + ax = axs[row, col] + data_array_str = ["position", "shape"][col] + # plot original data + ax.scatter( + x=ds.time, + y=ds[data_array_str].sel(individuals="id_1", space=space_coord), + marker="o", + color="black", + label="original data", + ) + # plot NaN filled data + ax.plot( + ds_nan.time, + ds_nan[data_array_str].sel(individuals="id_1", space=space_coord), + marker=".", + linewidth=1, + color="tab:blue", + label="upsampled data", + ) + ax.set_ylabel(f"{space_coord} (pixels)") + if row == 0: + ax.set_title(f"Bounding box {data_array_str}") + if col == 1: + ax.legend() + if row == 1: + ax.set_xlabel("time (frames)") +# %% +# We can further confirm we have NaNs where expected by printing the first few +# frames of the data. +print("Position data array (first 10 frames):") +print(ds_nan.position.isel(time=slice(0, 10), individuals=0).data) print("----") -print("Shape data array (first 14 frames):") -print(ds_nan.shape.data[:14, 0, :]) +print("Shape data array (first 10 frames):") +print(ds_nan.shape.isel(time=slice(0, 10), individuals=0).data) # %% # Linearly interpolate NaN values # ---------------------------------------------------------- -# We can instead fill in the missing values in the dataset applying linear -# interpolation to the ``position`` and ``shape`` data arrays. In this way, +# We can instead fill in the missing values in the dataset by linearly +# interpolating the ``position`` and ``shape`` data arrays. In this way, # we would be assuming that the centroid of the bounding box moves linearly # between the two annotated values, and its width and height change linearly # as well. @@ -211,31 +280,66 @@ print_report=False, ) -# check the first 14 frames of the data -print("Position data array (first 14 frames):") -print(ds_interp.position.data[:14, 0, :]) - -print("----") -print("Shape data array (first 14 frames):") -print(ds_interp.shape.data[:14, 0, :]) +# %% +# Like before, we can visually check that the missing data has been imputed as +# expected by plotting the x and y coordinates of the position and shape arrays +# in time. + +fig, axs = plt.subplots(2, 2, figsize=(8, 6)) +for row in range(axs.shape[0]): + space_coord = ["x", "y"][row] + for col in range(axs.shape[1]): + ax = axs[row, col] + data_array_str = ["position", "shape"][col] + # plot original data + ax.scatter( + x=ds.time, + y=ds[data_array_str].sel(individuals="id_1", space=space_coord), + marker="o", + color="black", + label="original data", + ) + # plot linearly interpolated data + ax.plot( + ds_interp.time, + ds_interp[data_array_str].sel( + individuals="id_1", space=space_coord + ), + marker=".", + linewidth=1, + color="tab:orange", + label="upsampled data", + ) + ax.set_ylabel(f"{space_coord} (pixels)") + if row == 0: + ax.set_title(f"Bounding box {data_array_str}") + if col == 1: + ax.legend() + if row == 1: + ax.set_xlabel("time (frames)") +# %% +# The plot above shows that between the original data points (in black), +# the data is assumed to evolve linearly (in blue). # %% -# Compare interpolation methods -# ------------------------------ +# Compare methods +# ---------------- # We can now qualitatively compare the three different methods of filling # in the missing frames, by plotting the bounding boxes -# for the first 6 frames of the video. +# for the first few frames of the video. # -# Remember only frames 0 and 5 are annotated in the original dataset. These -# are plotted in blue, while the forward filled values are plotted in orange -# and the linearly interpolated values in green. +# Remember that not all frames of the video are annotated in the original +# dataset. The original data are plotted in black, while the forward filled +# values are plotted in orange and the linearly interpolated values in green. -# sphinx_gallery_thumbnail_number = 2 +# sphinx_gallery_thumbnail_number = 4 # initialise figure fig = plt.figure(figsize=(8, 8)) +list_colors = ["tab:blue", "tab:green", "tab:orange"] + # loop over frames for frame_n in range(6): # add subplot axes @@ -247,7 +351,9 @@ ax.imshow(video[frame_n]) # plot bounding box for each dataset - for ds_i, ds_one in enumerate([ds_nan, ds_ff, ds_interp]): + for ds_i, ds_one in enumerate( + [ds_nan, ds_ff, ds_interp] + ): # blue, green , orange # plot box top_left_corner = ( ds_one.position.sel(time=frame_n, individuals="id_1").data @@ -297,7 +403,7 @@ # Let's assume the dataset with the forward filled values is the best suited # for our task - we can now export the computed values to a .csv file # -# Note that we currently do not provide explicit methods to export a +# Note that currently we do not provide explicit methods to export a # ``movement`` bounding boxes dataset in a specific format. However, we can # easily save the bounding boxes’ trajectories to a .csv file using the # standard Python library ``csv``. From 8f34dbcdb973397c3c132a89cdfaf8e4cfb4e629 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:05:56 +0000 Subject: [PATCH 20/25] Change numpy-style indexing to sel when looping thru video frames in first plot --- examples/load_and_upsample_bboxes.py | 47 +++++++++++++++------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index bdb289e6..ce5d8496 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -80,31 +80,32 @@ # Let's inspect the first 6 frames of the video for which we have # annotations, and plot the annotated bounding box and centroid at each frame. -# select indices of data to plot -data_start_idx = 0 -data_end_idx = 6 +# set last frame to plot +end_frame_idx = 25 +# create list of frames to loop over with step=5 +list_frames = list(range(0, end_frame_idx + 1, 5)) # initialise figure fig = plt.figure(figsize=(8, 8)) # width, height -# loop over data and plot over corresponding frame -for p_i, data_idx in enumerate(range(data_start_idx, data_end_idx)): +# loop over selected frames and plot the data +for i, frame_idx in enumerate(list_frames): # add subplot axes - ax = plt.subplot(math.ceil(data_end_idx / 2), 2, p_i + 1) + ax = plt.subplot(math.ceil(len(list_frames) / 2), 2, i + 1) # plot frame - # note: the video is indexed at every frame, so - # we use the frame number as index - ax.imshow(video[ds.time[data_idx].item()]) + ax.imshow(video[frame_idx]) # plot box at this frame top_left_corner = ( - ds.position[data_idx, 0, :].data - ds.shape[data_idx, 0, :].data / 2 - ) + ds.position.sel(time=frame_idx).data + - ds.shape.sel(time=frame_idx).data / 2 + ).squeeze() + bbox = plt.Rectangle( xy=tuple(top_left_corner), - width=ds.shape[data_idx, 0, 0].data, # x coordinate of shape array - height=ds.shape[data_idx, 0, 1].data, # y coordinate of shape array + width=ds.shape.sel(time=frame_idx, space="x").item(), + height=ds.shape.sel(time=frame_idx, space="y").item(), edgecolor="red", facecolor="none", linewidth=1.5, @@ -114,17 +115,17 @@ # plot box's centroid at this frame with red ring ax.scatter( - x=ds.position[data_idx, 0, 0].data, - y=ds.position[data_idx, 0, 1].data, + x=ds.position.sel(time=frame_idx, space="x"), + y=ds.position.sel(time=frame_idx, space="y"), s=15, color="red", ) # plot past centroid positions in blue - if data_idx > 0: + if frame_idx > 0: ax.scatter( - x=ds.position[0:data_idx, 0, 0].data, - y=ds.position[0:data_idx, 0, 1].data, + x=ds.position.sel(time=slice(0, frame_idx - 1), space="x"), + y=ds.position.sel(time=slice(0, frame_idx - 1), space="y"), s=5, color="tab:blue", label="past frames", @@ -132,23 +133,25 @@ # plot future centroid positions in white ax.scatter( - x=ds.position[data_idx + 1 : data_end_idx, 0, 0].data, - y=ds.position[data_idx + 1 : data_end_idx, 0, 1].data, + x=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="x"), + y=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="y"), s=5, color="white", label="future frames", ) - ax.set_title(f"Frame {ds.time[data_idx].item()}") + ax.set_title(f"Frame {frame_idx}") ax.set_xlabel("x (pixles)") ax.set_ylabel("y (pixels)") ax.set_xlabel("") - if p_i == 1: + if frame_idx == 1: ax.legend() fig.tight_layout() # %% +# We used ``xarray``'s ``.sel()`` method to select the data for the +# relevant frames directly. # # The centroid at each frame is marked with a red marker. The past centroid # positions are shown in blue and the future centroid positions in white. From 3bce7901a9d08edaa41ad50b0f5cd1b0ea2d7b46 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:49:09 +0000 Subject: [PATCH 21/25] Replace empty by missing --- examples/load_and_upsample_bboxes.py | 262 +++++++++++---------------- 1 file changed, 105 insertions(+), 157 deletions(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index ce5d8496..42971959 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -57,7 +57,6 @@ print(ds.time) # %% -# # In the following sections of the notebook we will explore options to upsample # the dataset by filling in values for the video frames with no data. @@ -80,6 +79,8 @@ # Let's inspect the first 6 frames of the video for which we have # annotations, and plot the annotated bounding box and centroid at each frame. +# sphinx_gallery_thumbnail_number = 1 + # set last frame to plot end_frame_idx = 25 # create list of frames to loop over with step=5 @@ -126,7 +127,7 @@ ax.scatter( x=ds.position.sel(time=slice(0, frame_idx - 1), space="x"), y=ds.position.sel(time=slice(0, frame_idx - 1), space="y"), - s=5, + s=10, color="tab:blue", label="past frames", ) @@ -135,11 +136,12 @@ ax.scatter( x=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="x"), y=ds.position.sel(time=slice(frame_idx + 1, end_frame_idx), space="y"), - s=5, + s=10, color="white", label="future frames", ) + # set title and labels ax.set_title(f"Frame {frame_idx}") ax.set_xlabel("x (pixles)") ax.set_ylabel("y (pixels)") @@ -160,7 +162,7 @@ # %% # Fill in empty values with forward filling # ---------------------------------------------------- -# We can fill in the frames with missing values for the ``position`` and +# We can fill in the frames with empty values for the ``position`` and # ``shape`` arrays by taking the last valid value in time. In this way, a # box's position and shape stay constant if for a current frame the box # has no annotation defined. @@ -171,87 +173,81 @@ ) # %% -# We can verify with a plot that the missing values have been filled in +# We can verify with a plot that the empty values have been filled in # using the last valid value in time. -# In the plot below, the original position and shape data is shown in black, -# while the forward-filled values are shown in blue. +# %% +# In the plot below, the original ``position`` and ``shape`` data is shown +# in black, while the forward-filled values are shown in green. + + +# We define a convenience function to plot the ``position`` and ``shape`` +# space coordinates for the input dataset and a filled one. +def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): + """Compare the x and y coordinates of the position and shape arrays in time + for the input and filled datasets. + """ + fig, axs = plt.subplots(2, 2, figsize=(8, 6)) + for row in range(axs.shape[0]): + space_coord = ["x", "y"][row] + for col in range(axs.shape[1]): + ax = axs[row, col] + data_array_str = ["position", "shape"][col] + + # plot original data + ax.scatter( + x=ds_input_data.time, + y=ds_input_data[data_array_str].sel( + individuals="id_1", space=space_coord + ), + marker="o", + color="black", + label="original data", + ) -fig, axs = plt.subplots(2, 2, figsize=(8, 6)) -for row in range(axs.shape[0]): - space_coord = ["x", "y"][row] - for col in range(axs.shape[1]): - ax = axs[row, col] - data_array_str = ["position", "shape"][col] - # plot original data - ax.scatter( - x=ds.time, - y=ds[data_array_str].sel(individuals="id_1", space=space_coord), - marker="o", - color="black", - label="original data", - ) - # plot forward filled data - ax.plot( - ds_ff.time, - ds_ff[data_array_str].sel(individuals="id_1", space=space_coord), - marker=".", - linewidth=1, - color="tab:green", - label="upsampled data", - ) - ax.set_ylabel(f"{space_coord} (pixels)") - if row == 0: - ax.set_title(f"Bounding box {data_array_str}") - if col == 1: - ax.legend() - if row == 1: - ax.set_xlabel("time (frames)") + # plot forward filled data + ax.plot( + ds_filled.time, + ds_filled[data_array_str].sel( + individuals="id_1", space=space_coord + ), + marker=".", + linewidth=1, + color=color_filled, + label="upsampled data", + ) + + # set axes labels and legend + ax.set_ylabel(f"{space_coord} (pixels)") + if row == 0: + ax.set_title(f"Bounding box {data_array_str}") + if col == 1: + ax.legend() + if row == 1: + ax.set_xlabel("time (frames)") +# plot +plot_position_and_shape_xy_coords( + ds, ds_filled=ds_ff, color_filled="tab:green" +) + # %% # Fill in empty values with NaN # ---------------------------------------------------- -# Alternatively, we can fill in the missing frames with NaN values. -# This can be useful if we want to interpolate the missing values later. +# Alternatively, we can fill in the empty frames with NaN values. +# This can be useful if we want to interpolate later. ds_nan = ds.reindex( {"time": list(range(ds.time[-1].item()))}, method=None, # default ) # %% -# Like before, we can verify with a plot that the missing values have been +# Like before, we can verify with a plot that the empty values have been # filled with NaN values. -fig, axs = plt.subplots(2, 2, figsize=(8, 6)) -for row in range(axs.shape[0]): - space_coord = ["x", "y"][row] - for col in range(axs.shape[1]): - ax = axs[row, col] - data_array_str = ["position", "shape"][col] - # plot original data - ax.scatter( - x=ds.time, - y=ds[data_array_str].sel(individuals="id_1", space=space_coord), - marker="o", - color="black", - label="original data", - ) - # plot NaN filled data - ax.plot( - ds_nan.time, - ds_nan[data_array_str].sel(individuals="id_1", space=space_coord), - marker=".", - linewidth=1, - color="tab:blue", - label="upsampled data", - ) - ax.set_ylabel(f"{space_coord} (pixels)") - if row == 0: - ax.set_title(f"Bounding box {data_array_str}") - if col == 1: - ax.legend() - if row == 1: - ax.set_xlabel("time (frames)") +plot_position_and_shape_xy_coords( + ds, ds_filled=ds_nan, color_filled="tab:blue" +) # %% # We can further confirm we have NaNs where expected by printing the first few @@ -265,7 +261,7 @@ # %% # Linearly interpolate NaN values # ---------------------------------------------------------- -# We can instead fill in the missing values in the dataset by linearly +# We can instead fill in the empty values in the dataset by linearly # interpolating the ``position`` and ``shape`` data arrays. In this way, # we would be assuming that the centroid of the bounding box moves linearly # between the two annotated values, and its width and height change linearly @@ -284,59 +280,29 @@ ) # %% -# Like before, we can visually check that the missing data has been imputed as -# expected by plotting the x and y coordinates of the position and shape arrays +# Like before, we can visually check that the empty data has been imputed as +# expected by plotting the x and y coordinates of the ``position`` +# and ``shape`` arrays # in time. -fig, axs = plt.subplots(2, 2, figsize=(8, 6)) -for row in range(axs.shape[0]): - space_coord = ["x", "y"][row] - for col in range(axs.shape[1]): - ax = axs[row, col] - data_array_str = ["position", "shape"][col] - # plot original data - ax.scatter( - x=ds.time, - y=ds[data_array_str].sel(individuals="id_1", space=space_coord), - marker="o", - color="black", - label="original data", - ) - # plot linearly interpolated data - ax.plot( - ds_interp.time, - ds_interp[data_array_str].sel( - individuals="id_1", space=space_coord - ), - marker=".", - linewidth=1, - color="tab:orange", - label="upsampled data", - ) - ax.set_ylabel(f"{space_coord} (pixels)") - if row == 0: - ax.set_title(f"Bounding box {data_array_str}") - if col == 1: - ax.legend() - if row == 1: - ax.set_xlabel("time (frames)") +plot_position_and_shape_xy_coords( + ds, ds_filled=ds_interp, color_filled="tab:orange" +) # %% # The plot above shows that between the original data points (in black), -# the data is assumed to evolve linearly (in blue). +# the data is assumed to evolve linearly (in orange). # %% # Compare methods # ---------------- -# We can now qualitatively compare the three different methods of filling -# in the missing frames, by plotting the bounding boxes -# for the first few frames of the video. +# We can now qualitatively compare the bounding boxes computed +# with the three different filling methods we have seen: forward filling, +# NaN filling and linear interpolation # -# Remember that not all frames of the video are annotated in the original -# dataset. The original data are plotted in black, while the forward filled -# values are plotted in orange and the linearly interpolated values in green. - -# sphinx_gallery_thumbnail_number = 4 +# In the plot below, the NaN-filled data are plotted in blue, the forward +# filled values are plotted in orange, and the linearly interpolated values +# are shown in green. # initialise figure fig = plt.figure(figsize=(8, 8)) @@ -344,59 +310,48 @@ list_colors = ["tab:blue", "tab:green", "tab:orange"] # loop over frames -for frame_n in range(6): +for frame_idx in range(6): # add subplot axes - ax = plt.subplot(3, 2, frame_n + 1) + ax = plt.subplot(3, 2, frame_idx + 1) # plot frame - # note: the video is indexed at every frame, so - # we use the frame number as index - ax.imshow(video[frame_n]) + ax.imshow(video[frame_idx]) # plot bounding box for each dataset - for ds_i, ds_one in enumerate( - [ds_nan, ds_ff, ds_interp] - ): # blue, green , orange + for ds_i, ds_filled in enumerate([ds_nan, ds_ff, ds_interp]): # plot box top_left_corner = ( - ds_one.position.sel(time=frame_n, individuals="id_1").data - - ds_one.shape.sel(time=frame_n, individuals="id_1").data / 2 - ) + ds_filled.position.sel(time=frame_idx).data + - ds_filled.shape.sel(time=frame_idx).data / 2 + ).squeeze() + bbox = plt.Rectangle( xy=tuple(top_left_corner), - width=ds_one.shape.sel( - time=frame_n, individuals="id_1", space="x" - ).data, - height=ds_one.shape.sel( - time=frame_n, individuals="id_1", space="y" - ).data, + width=ds_filled.shape.sel(time=frame_idx, space="x").item(), + height=ds_filled.shape.sel(time=frame_idx, space="y").item(), edgecolor=list_colors[ds_i], facecolor="none", # make line for NaN dataset thicker and dotted - linewidth=[5, 1.5, 1.5][ds_i], - linestyle=["dotted", "solid", "solid"][ds_i], label=["nan", "ffill", "linear"][ds_i], + linewidth=[8, 2.5, 2.5][ds_i], + linestyle=["dotted", "solid", "solid"][ds_i], ) ax.add_patch(bbox) # plot centroid ax.scatter( - x=ds_one.position.sel( - time=frame_n, individuals="id_1", space="x" - ).data, - y=ds_one.position.sel( - time=frame_n, individuals="id_1", space="y" - ).data, - s=5, + x=ds_filled.position.sel(time=frame_idx, space="x"), + y=ds_filled.position.sel(time=frame_idx, space="y"), + s=20, color=list_colors[ds_i], ) - # add legend to first frame - if frame_n == 0: - ax.legend() - ax.set_title(f"Frame {frame_n}") + # set title and labels + ax.set_title(f"Frame {frame_idx}") ax.set_xlabel("x (pixels)") ax.set_ylabel("y (pixels)") + if frame_idx == 0: + ax.legend() fig.tight_layout() @@ -419,23 +374,16 @@ writer = csv.writer(file) # write the header - writer.writerow( - ["frame_idx", "bbox_ID", "x", "y", "width", "height", "confidence"] - ) + writer.writerow(["frame", "ID", "x", "y", "width", "height"]) # write the data - for individual in ds.individuals.data: - for frame in ds.time.data: - x, y = ds.position.sel(time=frame, individuals=individual).data - width, height = ds.shape.sel( - time=frame, individuals=individual - ).data - confidence = ds.confidence.sel( + for individual in ds_ff.individuals.data: + for frame in ds_ff.time.data: + x, y = ds_ff.position.sel(time=frame, individuals=individual).data + width, height = ds_ff.shape.sel( time=frame, individuals=individual ).data - writer.writerow( - [frame, individual, x, y, width, height, confidence] - ) + writer.writerow([frame, individual, x, y, width, height]) # %% # Clean-up From 4cc2d511e85409caff974ac8546932a8fd1cc165 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:54:20 +0000 Subject: [PATCH 22/25] Fix text cell --- examples/load_and_upsample_bboxes.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index 42971959..575a6b34 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -175,14 +175,12 @@ # %% # We can verify with a plot that the empty values have been filled in # using the last valid value in time. - -# %% -# In the plot below, the original ``position`` and ``shape`` data is shown -# in black, while the forward-filled values are shown in green. +# +# For this we define a convenience function to plot the x and y coordinates +# of the ``position`` and ``shape`` arrays, for the input dataset and for +# a filled one. -# We define a convenience function to plot the ``position`` and ``shape`` -# space coordinates for the input dataset and a filled one. def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): """Compare the x and y coordinates of the position and shape arrays in time for the input and filled datasets. @@ -227,7 +225,11 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): ax.set_xlabel("time (frames)") -# plot +# %% +# In the plot below, the original ``position`` and ``shape`` data is shown +# in black, while the forward-filled values are shown in green. + + plot_position_and_shape_xy_coords( ds, ds_filled=ds_ff, color_filled="tab:green" ) From e5c2ca37fc6f83c62caa8fb5254c485afe82f540 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:56:06 +0000 Subject: [PATCH 23/25] Fix one sonarcloud issue --- examples/load_and_upsample_bboxes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index 575a6b34..022a0f52 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -185,7 +185,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): """Compare the x and y coordinates of the position and shape arrays in time for the input and filled datasets. """ - fig, axs = plt.subplots(2, 2, figsize=(8, 6)) + _, axs = plt.subplots(2, 2, figsize=(8, 6)) for row in range(axs.shape[0]): space_coord = ["x", "y"][row] for col in range(axs.shape[1]): From 6e2cf1fc511c9a4e5d34837ea7d1cbf2fe149834 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:44:48 +0000 Subject: [PATCH 24/25] Add tight_layout --- examples/load_and_upsample_bboxes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index 022a0f52..7b312294 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -185,7 +185,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): """Compare the x and y coordinates of the position and shape arrays in time for the input and filled datasets. """ - _, axs = plt.subplots(2, 2, figsize=(8, 6)) + fig, axs = plt.subplots(2, 2, figsize=(8, 6)) for row in range(axs.shape[0]): space_coord = ["x", "y"][row] for col in range(axs.shape[1]): @@ -224,6 +224,8 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): if row == 1: ax.set_xlabel("time (frames)") + fig.tight_layout() + # %% # In the plot below, the original ``position`` and ``shape`` data is shown From 9584634f1a029cbae215cef1719b1c3994d23f35 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:01:43 +0000 Subject: [PATCH 25/25] Fix missing last timestep --- examples/load_and_upsample_bboxes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/load_and_upsample_bboxes.py b/examples/load_and_upsample_bboxes.py index 7b312294..cfb1bba3 100644 --- a/examples/load_and_upsample_bboxes.py +++ b/examples/load_and_upsample_bboxes.py @@ -168,7 +168,7 @@ # has no annotation defined. ds_ff = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, + {"time": list(range(ds.time[-1].item() + 1))}, method="ffill", # propagate last valid index value forward ) @@ -242,7 +242,7 @@ def plot_position_and_shape_xy_coords(ds_input_data, ds_filled, color_filled): # Alternatively, we can fill in the empty frames with NaN values. # This can be useful if we want to interpolate later. ds_nan = ds.reindex( - {"time": list(range(ds.time[-1].item()))}, + {"time": list(range(ds.time[-1].item() + 1))}, method=None, # default )