Update docstrings + code comments

neuroinformatics-unit · Dec 5, 2024 · c95dc22 · c95dc22
1 parent 5aabdbd
commit c95dc22
Show file tree

Hide file tree

Showing 12 changed files with 73 additions and 118 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -110,7 +110,7 @@
         "dependencies": ["environment.yml"],
     },
     "reference_url": {"movement": None},
-    "default_thumb_file": "source/_static/data_icon.png",  # default thumbnail image  
+    "default_thumb_file": "source/_static/data_icon.png",  # default thumbnail image
     "remove_config_comments": True,
     # do not render config params set as # sphinx_gallery_config [= value]
 }
@@ -206,6 +206,7 @@
 intersphinx_mapping = {
     "xarray": ("https://docs.xarray.dev/en/stable/", None),
     "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
 }
 
 

diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md
@@ -93,7 +93,7 @@ with three keypoints each: ``snout``, ``centre``, and ``tail_base``. These keypo
 import numpy as np
 
 ds = load_poses.from_numpy(
-    position_array=np.random.rand((100, 2, 3, 2)),
+    position_array=np.random.rand(100, 2, 3, 2),
     confidence_array=np.ones((100, 2, 3)),
     individual_names=["Alice", "Bob"],
     keypoint_names=["snout", "centre", "tail_base"],
@@ -256,7 +256,7 @@ with open(filepath, mode="w", newline="") as file:
             writer.writerow([frame, individual, x, y, width, height, confidence])
 
 ```
-Alternatively, we can convert the `movement` bounding boxes' dataset to a pandas DataFrame with the {func}`.xarray.DataArray.to_dataframe()` method, wrangle the dataframe as required, and then apply the {func}`.pandas.DataFrame.to_csv()` method to save the data as a .csv file.
+Alternatively, we can convert the `movement` bounding boxes' dataset to a pandas DataFrame with the {meth}`xarray.DataArray.to_dataframe` method, wrangle the dataframe as required, and then apply the {meth}`pandas.DataFrame.to_csv` method to save the data as a .csv file.
 
 
 (target-sample-data)=

diff --git a/docs/source/user_guide/movement_dataset.md b/docs/source/user_guide/movement_dataset.md
@@ -43,15 +43,15 @@ print(ds)
 and we would obtain an output such as:
 ```
 <xarray.Dataset> Size: 27kB
-Dimensions:      (time: 601, individuals: 3, keypoints: 1, space: 2)
+Dimensions:      (time: 601, space: 2, keypoints: 1, individuals: 3)
 Coordinates:
   * time         (time) float64 5kB 0.0 0.02 0.04 0.06 ... 11.96 11.98 12.0
-  * individuals  (individuals) <U10 120B 'AEON3B_NTP' 'AEON3B_TP1' 'AEON3B_TP2'
-  * keypoints    (keypoints) <U8 32B 'centroid'
   * space        (space) <U1 8B 'x' 'y'
+  * keypoints    (keypoints) <U8 32B 'centroid'
+  * individuals  (individuals) <U10 120B 'AEON3B_NTP' 'AEON3B_TP1' 'AEON3B_TP2'
 Data variables:
-    position     (time, individuals, keypoints, space) float32 14kB 770.3 ......
-    confidence   (time, individuals, keypoints) float32 7kB nan nan ... nan nan
+    position     (time, space, keypoints, individuals) float32 14kB 770.3 ......
+    confidence   (time, keypoints, individuals) float32 7kB nan nan ... nan nan
 Attributes:
     fps:              50.0
     time_unit:        seconds
@@ -78,14 +78,14 @@ print(ds)
 and the last command would print out:
 ```
 <xarray.Dataset> Size: 19kB
-Dimensions:      (time: 5, individuals: 86, space: 2)
+Dimensions:      (time: 5, space: 2, individuals: 86)
 Coordinates:
   * time         (time) int64 40B 0 1 2 3 4
-  * individuals  (individuals) <U5 2kB 'id_1' 'id_2' 'id_3' ... 'id_89' 'id_90'
   * space        (space) <U1 8B 'x' 'y'
+  * individuals  (individuals) <U5 2kB 'id_1' 'id_2' 'id_3' ... 'id_89' 'id_90'
 Data variables:
-    position     (time, individuals, space) float64 7kB 871.8 ... 905.3
-    shape        (time, individuals, space) float64 7kB 60.0 53.0 ... 51.0 36.0
+    position     (time, space, individuals) float64 7kB 901.8 ... 923.3
+    shape        (time, space, individuals) float64 7kB 60.0 30.0 ... 72.0 36.0
     confidence   (time, individuals) float64 3kB nan nan nan nan ... nan nan nan
 Attributes:
     fps:              None
@@ -114,25 +114,25 @@ the labelled "ticks" along each axis are called **coordinates** (`coords`).
 :::{tab-item} Poses dataset
 A `movement` poses dataset has the following **dimensions**:
 - `time`, with size equal to the number of frames in the video.
-- `individuals`, with size equal to the number of tracked individuals/instances.
-- `keypoints`, with size equal to the number of tracked keypoints per individual.
 - `space`, which is the number of spatial dimensions. Currently, we support only 2D poses.
+- `keypoints`, with size equal to the number of tracked keypoints per individual.
+- `individuals`, with size equal to the number of tracked individuals/instances.
 :::
 
 :::{tab-item} Bounding boxes' dataset
 A `movement` bounding boxes dataset has the following **dimensions**s:
 - `time`, with size equal to the number of frames in the video.
-- `individuals`, with size equal to the number of tracked individuals/instances.
 - `space`, which is the number of spatial dimensions. Currently, we support only 2D bounding boxes data.
+- `individuals`, with size equal to the number of tracked individuals/instances.
 Notice that these are the same dimensions as for a poses dataset, except for the `keypoints` dimension.
 :::
 ::::
 
 In both cases, appropriate **coordinates** are assigned to each **dimension**.
-- `individuals` are labelled with a list of unique names (e.g. `mouse1`, `mouse2`, etc. or `id_0`, `id_1`, etc.).
-- `keypoints` are likewise labelled with a list of unique body part names, e.g. `snout`, `right_ear`, etc. Note that this dimension only exists in the poses dataset.
-- `space` is labelled with either `x`, `y` (2D) or `x`, `y`, `z` (3D). Note that bounding boxes datasets are restricted to 2D space.
 - `time` is labelled in seconds if `fps` is provided, otherwise the **coordinates** are expressed in frames (ascending 0-indexed integers).
+- `space` is labelled with either `x`, `y` (2D) or `x`, `y`, `z` (3D). Note that bounding boxes datasets are restricted to 2D space.
+- `keypoints` are likewise labelled with a list of unique body part names, e.g. `snout`, `right_ear`, etc. Note that this dimension only exists in the poses dataset.
+- `individuals` are labelled with a list of unique names (e.g. `mouse1`, `mouse2`, etc. or `id_0`, `id_1`, etc.).
 
 :::{dropdown} Additional dimensions
 :color: info
@@ -156,14 +156,14 @@ The specific data variables stored are slightly different between a `movement` p
 ::::{tab-set}
 :::{tab-item} Poses dataset
 A `movement` poses dataset contains two **data variables**:
-- `position`: the 2D or 3D locations of the keypoints over time, with shape (`time`, `individuals`, `keypoints`, `space`).
-- `confidence`: the confidence scores associated with each predicted keypoint (as reported by the pose estimation model), with shape (`time`, `individuals`, `keypoints`).
+- `position`: the 2D or 3D locations of the keypoints over time, with shape (`time`, `space`, `keypoints`, `individuals`).
+- `confidence`: the confidence scores associated with each predicted keypoint (as reported by the pose estimation model), with shape (`time`, `keypoints`, `individuals`).
 :::
 
 :::{tab-item} Bounding boxes' dataset
 A `movement` bounding boxes dataset contains three **data variables**:
-- `position`: the 2D locations of the bounding boxes' centroids over time, with shape (`time`, `individuals`, `space`).
-- `shape`: the width and height of the bounding boxes over time, with shape (`time`, `individuals`, `space`).
+- `position`: the 2D locations of the bounding boxes' centroids over time, with shape (`time`, `space`, `individuals`).
+- `shape`: the width and height of the bounding boxes over time, with shape (`time`, `space`, `individuals`).
 - `confidence`: the confidence scores associated with each predicted bounding box, with shape (`time`, `individuals`).
 :::
 ::::

diff --git a/movement/io/load_bboxes.py b/movement/io/load_bboxes.py
@@ -36,12 +36,12 @@ def from_numpy(
     Parameters
     ----------
     position_array : np.ndarray
-        Array of shape (n_frames, n_individuals, n_space)
+        Array of shape (n_frames, n_space, n_individuals)
         containing the tracks of the bounding boxes' centroids.
         It will be converted to a :class:`xarray.DataArray` object
         named "position".
     shape_array : np.ndarray
-        Array of shape (n_frames, n_individuals, n_space)
+        Array of shape (n_frames, n_space, n_individuals)
         containing the shape of the bounding boxes. The shape of a bounding
         box is its width (extent along the x-axis of the image) and height
         (extent along the y-axis of the image). It will be converted to a
@@ -56,7 +56,7 @@ def from_numpy(
         If None (default), bounding boxes are assigned names based on the size
         of the ``position_array``. The names will be in the format of
         ``id_<N>``, where <N>  is an integer from 0 to
-        ``position_array.shape[1]-1`` (i.e., "id_0", "id_1"...).
+        ``position_array.shape[-1]-1`` (i.e., "id_0", "id_1"...).
     frame_array : np.ndarray, optional
         Array of shape (n_frames, 1) containing the frame numbers for which
         bounding boxes are defined. If None (default), frame numbers will
@@ -376,9 +376,9 @@ def _numpy_arrays_from_via_tracks_file(
     The extracted numpy arrays are returned in a dictionary with the following
     keys:
 
-    - position_array (n_frames, n_individuals, n_space):
+    - position_array (n_frames, n_space, n_individuals):
         contains the trajectories of the bounding boxes' centroids.
-    - shape_array (n_frames, n_individuals, n_space):
+    - shape_array (n_frames, n_space, n_individuals):
         contains the shape of the bounding boxes (width and height).
     - confidence_array (n_frames, n_individuals):
         contains the confidence score of each bounding box.

diff --git a/movement/io/load_poses.py b/movement/io/load_poses.py
@@ -31,11 +31,11 @@ def from_numpy(
     Parameters
     ----------
     position_array : np.ndarray
-        Array of shape (n_frames, n_individuals, n_keypoints, n_space)
+        Array of shape (n_frames, n_space, n_keypoints, n_individuals)
         containing the poses. It will be converted to a
         :class:`xarray.DataArray` object named "position".
     confidence_array : np.ndarray, optional
-        Array of shape (n_frames, n_individuals, n_keypoints) containing
+        Array of shape (n_frames, n_keypoints, n_individuals) containing
         the point-wise confidence scores. It will be converted to a
         :class:`xarray.DataArray` object named "confidence".
         If None (default), the scores will be set to an array of NaNs.
@@ -70,7 +70,7 @@ def from_numpy(
     >>> import numpy as np
     >>> from movement.io import load_poses
     >>> ds = load_poses.from_numpy(
-    ...     position_array=np.random.rand((100, 2, 3, 2)),
+    ...     position_array=np.random.rand(100, 2, 3, 2),
     ...     confidence_array=np.ones((100, 2, 3)),
     ...     individual_names=["Alice", "Bob"],
     ...     keypoint_names=["snout", "centre", "tail_base"],
@@ -156,7 +156,7 @@ def from_dlc_style_df(
         be formatted as in DeepLabCut output files (see Notes).
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
     source_software : str, optional
         Name of the pose estimation software from which the data originate.
         Defaults to "DeepLabCut", but it can also be "LightningPose"
@@ -225,7 +225,7 @@ def from_sleap_file(
         also be supplied (but this feature is experimental, see Notes).
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
 
     Returns
     -------
@@ -299,7 +299,7 @@ def from_lp_file(
         Path to the file containing the predicted poses, in .csv format.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
 
     Returns
     -------
@@ -330,7 +330,7 @@ def from_dlc_file(
         or .csv format.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
 
     Returns
     -------
@@ -368,7 +368,7 @@ def from_multiview_files(
         The source software of the file.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
 
     Returns
     -------
@@ -404,7 +404,7 @@ def _ds_from_lp_or_dlc_file(
         The source software of the file.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame numbers.
+        the ``time`` coordinates will be in frame numbers.
 
     Returns
     -------
@@ -422,10 +422,12 @@ def _ds_from_lp_or_dlc_file(
     )
 
     # Load the DeepLabCut poses into a DataFrame
-    if file.path.suffix == ".csv":
-        df = _df_from_dlc_csv(file.path)
-    else:  # file.path.suffix == ".h5"
-        df = _df_from_dlc_h5(file.path)
+
+    df = (
+        _df_from_dlc_csv(file.path)
+        if file.path.suffix == ".csv"
+        else _df_from_dlc_h5(file.path)
+    )
 
     logger.debug(f"Loaded poses from {file.path} into a DataFrame.")
     # Convert the DataFrame to an xarray dataset
@@ -450,7 +452,7 @@ def _ds_from_sleap_analysis_file(
         Path to the SLEAP analysis file containing predicted pose tracks.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame units.
+        the ``time`` coordinates will be in frame units.
 
     Returns
     -------
@@ -462,7 +464,7 @@ def _ds_from_sleap_analysis_file(
     file = ValidHDF5(file_path, expected_datasets=["tracks"])
 
     with h5py.File(file.path, "r") as f:
-        # transpose to shape: (n_frames, n_space, n_keypoints, n_tracks)
+        # Transpose to shape: (n_frames, n_space, n_keypoints, n_tracks)
         tracks = f["tracks"][:].transpose(3, 1, 2, 0)
         # Create an array of NaNs for the confidence scores
         scores = np.full(tracks.shape[:1] + tracks.shape[2:], np.nan)
@@ -498,7 +500,7 @@ def _ds_from_sleap_labels_file(
         Path to the SLEAP labels file containing predicted pose tracks.
     fps : float, optional
         The number of frames per second in the video. If None (default),
-        the `time` coordinates will be in frame units.
+        the ``time`` coordinates will be in frame units.
 
     Returns
     -------

diff --git a/movement/io/save_poses.py b/movement/io/save_poses.py
@@ -45,15 +45,13 @@ def _ds_to_dlc_style_df(
     # Reverse the order of the dimensions except for the time dimension
     transpose_order = [0] + list(range(tracks_with_scores.ndim - 1, 0, -1))
     tracks_with_scores = tracks_with_scores.transpose(transpose_order)
-
     # Create DataFrame with multi-index columns
     df = pd.DataFrame(
         data=tracks_with_scores.reshape(ds.sizes["time"], -1),
         index=np.arange(ds.sizes["time"], dtype=int),
         columns=columns,
         dtype=float,
     )
-
     return df
 
 

diff --git a/movement/kinematics.py b/movement/kinematics.py
@@ -273,38 +273,32 @@ def compute_forward_vector(
             "Input data must have exactly 2 spatial dimensions, but "
             f"currently has {len(data.space)}.",
         )
-
     # Validate input keypoints
     if left_keypoint == right_keypoint:
         raise log_error(
             ValueError, "The left and right keypoints may not be identical."
         )
-
     # Define right-to-left vector
     right_to_left_vector = data.sel(
         keypoints=left_keypoint, drop=True
     ) - data.sel(keypoints=right_keypoint, drop=True)
-
     # Define upward vector
     # default: negative z direction in the image coordinate system
-    if camera_view == "top_down":
-        upward_vector = np.array([0, 0, -1])
-    else:
-        upward_vector = np.array([0, 0, 1])
-
+    upward_vector = (
+        np.array([0, 0, -1])
+        if camera_view == "top_down"
+        else np.array([0, 0, 1])
+    )
     upward_vector = xr.DataArray(
         np.tile(upward_vector.reshape(1, -1), [len(data.time), 1]),
         dims=["time", "space"],
     )
-
     # Compute forward direction as the cross product
     # (right-to-left) cross (forward) = up
     forward_vector = xr.cross(
         right_to_left_vector, upward_vector, dim="space"
     )[:, :, :-1]  # keep only the first 2 dimensions of the result
-
     # Return unit vector
-
     return forward_vector / compute_norm(forward_vector)
 
 
@@ -780,7 +774,6 @@ def compute_path_length(
                 time=slice(1, None)
             )  # skip first displacement (always 0)
         ).sum(dim="time", min_count=1)  # return NaN if no valid segment
-
     elif nan_policy == "scale":
         return _compute_scaled_path_length(data)
     else:
@@ -815,7 +808,6 @@ def _warn_about_nan_proportion(
             ValueError,
             "nan_warn_threshold must be between 0 and 1.",
         )
-
     n_nans = data.isnull().any(dim="space").sum(dim="time")
     data_to_warn_about = data.where(
         n_nans > data.sizes["time"] * nan_warn_threshold, drop=True