talmolab · shaikh58 · Jul 31, 2024 · Jul 31, 2024 · Aug 2, 2024 · Aug 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,8 @@ dreem/training/models/*
 
 # docs
 site/
+*.xml
+dreem/training/configs/base.yaml
+dreem/training/configs/override.yaml
+dreem/training/configs/override.yaml
+dreem/training/configs/base.yaml
diff --git a/dreem/datasets/sleap_dataset.py b/dreem/datasets/sleap_dataset.py
@@ -415,4 +415,4 @@ def get_instances(self, label_idx: list[int], frame_idx: list[int]) -> list[Fram
     def __del__(self):
         """Handle file closing before garbage collection."""
         for reader in self.videos:
-            reader.close()
+            reader.close()
diff --git a/dreem/inference/eval.py b/dreem/inference/eval.py
@@ -77,4 +77,4 @@ def run(cfg: DictConfig) -> dict[int, sio.Labels]:
 
     # override with params config, and specific params:
     # python eval.py --config-dir=./configs --config-name=inference +params_config=configs/params.yaml dataset.train_dataset.padding=10
-    run()
+    run()
diff --git a/dreem/inference/post_processing.py b/dreem/inference/post_processing.py
@@ -126,6 +126,8 @@ def filter_max_center_dist(
     k_boxes: torch.Tensor | None = None,
     nonk_boxes: torch.Tensor | None = None,
     id_inds: torch.Tensor | None = None,
+    h: int = None,
+    w: int = None
 ) -> torch.Tensor:
     """Filter trajectory score by distances between objects across frames.
 
@@ -135,6 +137,8 @@ def filter_max_center_dist(
         k_boxes: The bounding boxes in the current frame
         nonk_boxes: the boxes not in the current frame
         id_inds: track ids
+        h: height of image
+        w: width of image
 
     Returns:
         An N_t x N association matrix
@@ -147,13 +151,15 @@ def filter_max_center_dist(
         k_s = ((k_boxes[:, :, 2:] - k_boxes[:, :, :2]) ** 2).sum(dim=2)  # n_k
 
         nonk_ct = (nonk_boxes[:, :, :2] + nonk_boxes[:, :, 2:]) / 2
-
+        # TODO: nonk_boxes should be only from previous frame rather than entire window    
         dist = ((k_ct[:, None, :, :] - nonk_ct[None, :, :, :]) ** 2).sum(
             dim=-1
         )  # n_k x Np
-
-        norm_dist = dist / (k_s[:, None, :] + 1e-8)
+        # TODO: note that dist is in units of fraction of the height and width of the image; 
+        # TODO: need to scale it by the original image size so that its in units of pixels
+        # norm_dist = dist / (k_s[:, None, :] + 1e-8)
         norm_dist = dist.mean(axis=-1)  # n_k x Np
+        # norm_dist = 
 
         valid = norm_dist < max_center_dist  # n_k x Np
         valid_assn = (

diff --git a/dreem/inference/track.py b/dreem/inference/track.py
@@ -163,4 +163,4 @@ def run(cfg: DictConfig) -> dict[int, sio.Labels]:
 
     # override with params config, and specific params:
     # python train.py --config-dir=./configs --config-name=inference +params_config=configs/params.yaml dataset.train_dataset.padding=10
-    run()
+    run()
diff --git a/dreem/inference/tracker.py b/dreem/inference/tracker.py
@@ -138,8 +138,10 @@ def track(
         # asso_preds, pred_boxes, pred_time, embeddings = self.model(
         #     instances, reid_features
         # )
+        # get reference and query instances from TrackQueue and calls _run_global_tracker()
         instances_pred = self.sliding_inference(model, frames)
 
+        # e.g. during train/val, don't track across batches so persistent_tracking is switched off
         if not self.persistent_tracking:
             logger.debug(f"Clearing Queue after tracking")
             self.track_queue.end_tracks()
@@ -164,7 +166,9 @@ def sliding_inference(
         # H: height.
         # W: width.
 
+        # frames is untracked clip for inference
         for batch_idx, frame_to_track in enumerate(frames):
+            # tracked_frames is a list of reference frames that have been tracked (associated)
             tracked_frames = self.track_queue.collate_tracks(
                 device=frame_to_track.frame_id.device
             )
@@ -188,10 +192,11 @@ def sliding_inference(
                     )
 
                     curr_track_id = 0
+                    # if track ids exist from another tracking program i.e. sleap, init with those
                     for i, instance in enumerate(frames[batch_idx].instances):
                         instance.pred_track_id = instance.gt_track_id
                         curr_track_id = max(curr_track_id, instance.pred_track_id)
-
+                    # if no track ids, then assign new ones
                     for i, instance in enumerate(frames[batch_idx].instances):
                         if instance.pred_track_id == -1:
                             curr_track_id += 1
@@ -201,6 +206,7 @@ def sliding_inference(
                 if (
                     frame_to_track.has_instances()
                 ):  # Check if there are detections. If there are skip and increment gap count
+                    # combine the tracked frames with the latest frame; inference pipeline uses latest frame as pred
                     frames_to_track = tracked_frames + [
                         frame_to_track
                     ]  # better var name?
@@ -217,7 +223,7 @@ def sliding_inference(
                 self.track_queue.add_frame(frame_to_track)
             else:
                 self.track_queue.increment_gaps([])
-
+            # update the frame object from the input inference untracked clip
             frames[batch_idx] = frame_to_track
         return frames
 
@@ -252,7 +258,7 @@ def _run_global_tracker(
         # E.g.: instances_per_frame: [4, 5, 6, 7]; window of length 4 with 4 detected instances in the first frame of the window.
 
         _ = model.eval()
-
+        # get the last frame in the clip to perform inference on
         query_frame = frames[query_ind]
 
         query_instances = query_frame.instances
@@ -279,8 +285,10 @@ def _run_global_tracker(
 
         # (L=1, n_query, total_instances)
         with torch.no_grad():
+            # GTR knows this is for inference since query_instances is not None
             asso_matrix = model(all_instances, query_instances)
 
+        # GTR output is n_query x n_instances - split this into per-frame to softmax each frame separately
         asso_output = asso_matrix[-1].matrix.split(
             instances_per_frame, dim=1
         )  # (window_size, n_query, N_i)
@@ -296,7 +304,7 @@ def _run_global_tracker(
 
         asso_output_df.index.name = "Instances"
         asso_output_df.columns.name = "Instances"
-
+        # save the association matrix to the Frame object
         query_frame.add_traj_score("asso_output", asso_output_df)
         query_frame.asso_output = asso_matrix[-1]
 
@@ -343,6 +351,8 @@ def _run_global_tracker(
 
         query_frame.add_traj_score("asso_nonquery", asso_nonquery_df)
 
+        # need frame height and width to scale boxes during post-processing
+        _, h, w = query_frame.img_shape.flatten() 
         pred_boxes = model_utils.get_boxes(all_instances)
         query_boxes = pred_boxes[query_inds]  # n_k x 4
         nonquery_boxes = pred_boxes[nonquery_inds]  # n_nonquery x 4
@@ -374,7 +384,7 @@ def _run_global_tracker(
 
             query_frame.add_traj_score("decay_time", decay_time_traj_score)
         ################################################################################
-
+        # reduce association matrix - aggregating reference instance association scores by tracks
         # (n_query x n_nonquery) x (n_nonquery x n_traj) --> n_query x n_traj
         traj_score = torch.mm(traj_score, id_inds.cpu())  # (n_query, n_traj)
 
@@ -387,6 +397,7 @@ def _run_global_tracker(
 
         query_frame.add_traj_score("traj_score", traj_score_df)
         ################################################################################
+        # IOU-based post-processing; add a weighted IOU across successive frames to association scores
 
         # with iou -> combining with location in tracker, they set to True
         # todo -> should also work without pos_embed
@@ -421,11 +432,12 @@ def _run_global_tracker(
 
             query_frame.add_traj_score("weight_iou", iou_traj_score)
         ################################################################################
+        # filters association matrix such that instances too far from each other get scores=0
 
         # threshold for continuing a tracking or starting a new track -> they use 1.0
         # todo -> should also work without pos_embed
         traj_score = post_processing.filter_max_center_dist(
-            traj_score, self.max_center_dist, query_boxes, nonquery_boxes, id_inds
+            traj_score, self.max_center_dist, query_boxes, nonquery_boxes, id_inds, h, w
         )
 
         if self.max_center_dist is not None and self.max_center_dist > 0:
@@ -439,6 +451,7 @@ def _run_global_tracker(
             query_frame.add_traj_score("max_center_dist", max_center_dist_traj_score)
 
         ################################################################################
+        # softmax along tracks for each instance, for interpretability
         scaled_traj_score = torch.softmax(traj_score, dim=1)
         scaled_traj_score_df = pd.DataFrame(
             scaled_traj_score.numpy(), columns=unique_ids.cpu().numpy()
@@ -449,6 +462,7 @@ def _run_global_tracker(
         query_frame.add_traj_score("scaled", scaled_traj_score_df)
         ################################################################################
 
+        # hungarian matching
         match_i, match_j = linear_sum_assignment((-traj_score))
 
         track_ids = instance_ids.new_full((n_query,), -1)
@@ -462,6 +476,7 @@ def _run_global_tracker(
             thresh = (
                 overlap_thresh * id_inds[:, j].sum() if mult_thresh else overlap_thresh
             )
+            # if the association score for a query instance is lower than the threshold, create a new track for it
             if n_traj >= self.max_tracks or traj_score[i, j] > thresh:
                 logger.debug(
                     f"Assigning instance {i} to track {j} with id {unique_ids[j]}"

diff --git a/dreem/io/config.py b/dreem/io/config.py
@@ -226,7 +226,7 @@ def get_dataset(
         mode: str,
         label_files: list[str] | None = None,
         vid_files: list[str | list[str]] = None,
-    ) -> "SleapDataset" | "MicroscopyDataset" | "CellTrackingDataset":
+    ) -> "SleapDataset" | "MicroscopyDataset" | "CellTrackingDataset" | None:
         """Getter for datasets.
 
         Args:
@@ -301,6 +301,10 @@ def get_dataset(
                 "Could not resolve dataset type from Config! Please include \
                 either `slp_files` or `tracks`/`source`"
             )
+        if len(dataset) == 0:
+            logger.warn(f"Length of {mode} dataset is {len(dataset)}! Returning None")
+            return None
+        return dataset
-        if len(dataset) == 0:
-            logger.warn(f"Length of {mode} dataset is {len(dataset)}! Returning None")
-            return None
-        return dataset
+        if len(dataset) == 0:
+            logger.warning(f"Length of {mode} dataset is {len(dataset)}! Returning None")
+            return None
+        return dataset
-        if len(dataset) == 0:
-            logger.warn(f"Length of {mode} dataset is {len(dataset)}! Returning None")
-            return None
-        return dataset
+        if len(dataset) == 0:
+            logger.warning(f"Length of {mode} dataset is {len(dataset)}! Returning None")
+            return None
+        return dataset
 
     @property
     def data_paths(self):
@@ -319,9 +323,9 @@ def data_paths(self, paths: tuple[str, list[str]]):
 
     def get_dataloader(
         self,
-        dataset: "SleapDataset" | "MicroscopyDataset" | "CellTrackingDataset",
+        dataset: "SleapDataset" | "MicroscopyDataset" | "CellTrackingDataset" | None,
         mode: str,
-    ) -> torch.utils.data.DataLoader:
+    ) -> torch.utils.data.DataLoader | None:
         """Getter for dataloader.
 
         Args:
@@ -350,14 +354,21 @@ def get_dataloader(
         else:
             pin_memory = False
 
-        return torch.utils.data.DataLoader(
+        dataloader = torch.utils.data.DataLoader(
-        dataloader = torch.utils.data.DataLoader(
+        dataloader = torch.utils.data.DataLoader(
+            dataset=dataset,
+            pin_memory=pin_memory,
+            collate_fn=dataset.no_batching_fn,
+            **dataloader_params,
+        )
-        dataloader = torch.utils.data.DataLoader(
+        dataloader = torch.utils.data.DataLoader(
+            dataset=dataset,
+            pin_memory=pin_memory,
+            collate_fn=dataset.no_batching_fn,
+            **dataloader_params,
+        )
             dataset=dataset,
             batch_size=1,
             pin_memory=pin_memory,
             collate_fn=dataset.no_batching_fn,
             **dataloader_params,
         )
 
+        if len(dataloader) == 0:
+            logger.warn(
+                f"Length of {mode} dataloader is {len(dataloader)}! Returning `None`"
+            )
-            logger.warn(
-                f"Length of {mode} dataloader is {len(dataloader)}! Returning `None`"
-            )
+            logger.warning(
+                f"Length of {mode} dataloader is {len(dataloader)}! Returning `None`"
+            )
-            logger.warn(
-                f"Length of {mode} dataloader is {len(dataloader)}! Returning `None`"
-            )
+            logger.warning(
+                f"Length of {mode} dataloader is {len(dataloader)}! Returning `None`"
+            )
+            return None
+        return dataloader
+
     def get_optimizer(self, params: Iterable) -> torch.optim.Optimizer:
         """Getter for optimizer.
 
@@ -492,7 +503,7 @@ def get_checkpointing(self) -> pl.callbacks.ModelCheckpoint:
                 filename=f"{{epoch}}-{{{metric}}}",
                 **checkpoint_params,
             )
-            checkpointer.CHECKPOINT_NAME_LAST = f"{{epoch}}-best-{{{metric}}}"
+            checkpointer.CHECKPOINT_NAME_LAST = f"{{epoch}}-final-{{{metric}}}"
             checkpointers.append(checkpointer)
         return checkpointers
 

diff --git a/dreem/io/instance.py b/dreem/io/instance.py
@@ -565,7 +565,11 @@ def add_embedding(self, emb_type: str, embedding: torch.Tensor) -> None:
             emb_type: Key/embedding type to be saved to dictionary
             embedding: The actual torch tensor embedding.
         """
-        embedding = _expand_to_rank(embedding, 2)
+        if (
+            type(embedding) != dict
+        ):  # for embedding agg method "average", input is array
+            # for method stack and concatenate, input is dict
+            embedding = _expand_to_rank(embedding, 2)
         self._embeddings[emb_type] = embedding
 
     @property

diff --git a/dreem/models/attention_head.py b/dreem/models/attention_head.py
@@ -9,23 +9,40 @@
 class ATTWeightHead(torch.nn.Module):
     """Single attention head."""
 
-    def __init__(
-        self,
-        feature_dim: int,
-        num_layers: int,
-        dropout: float,
-    ):
+    def __init__(self, feature_dim: int, num_layers: int, dropout: float, **kwargs):
         """Initialize an instance of ATTWeightHead.
 
         Args:
             feature_dim: The dimensionality of input features.
             num_layers: The number of hidden layers in the MLP.
             dropout: Dropout probability.
+            embedding_agg_method: how the embeddings are aggregated; average/stack/concatenate
         """
         super().__init__()
+        if "embedding_agg_method" in kwargs:
+            self.embedding_agg_method = kwargs["embedding_agg_method"]
+        else:
+            self.embedding_agg_method = None
-        if "embedding_agg_method" in kwargs:
-            self.embedding_agg_method = kwargs["embedding_agg_method"]
-        else:
-            self.embedding_agg_method = None
+        self.embedding_agg_method = kwargs.get("embedding_agg_method", None)
-        if "embedding_agg_method" in kwargs:
-            self.embedding_agg_method = kwargs["embedding_agg_method"]
-        else:
-            self.embedding_agg_method = None
+        self.embedding_agg_method = kwargs.get("embedding_agg_method", None)
 
-        self.q_proj = MLP(feature_dim, feature_dim, feature_dim, num_layers, dropout)
-        self.k_proj = MLP(feature_dim, feature_dim, feature_dim, num_layers, dropout)
+        # if using stacked embeddings, use 1x1 conv with x,y,t embeddings as channels
+        # ensures output represents ref instances by query instances
+        if self.embedding_agg_method == "stack":
+            self.q_proj = torch.nn.Conv1d(
+                in_channels=3, out_channels=1, kernel_size=1, stride=1, padding=0
+            )
+            self.k_proj = torch.nn.Conv1d(
+                in_channels=3, out_channels=1, kernel_size=1, stride=1, padding=0
+            )
+            self.attn_x = torch.nn.MultiheadAttention(feature_dim, 1)
+            self.attn_y = torch.nn.MultiheadAttention(feature_dim, 1)
+            self.attn_t = torch.nn.MultiheadAttention(feature_dim, 1)
+        else:
+            self.q_proj = MLP(
+                feature_dim, feature_dim, feature_dim, num_layers, dropout
+            )
+            self.k_proj = MLP(
+                feature_dim, feature_dim, feature_dim, num_layers, dropout
+            )
 
     def forward(
         self,
@@ -41,8 +58,45 @@ def forward(
         Returns:
             Output tensor of shape (batch_size, num_frame_instances, num_window_instances).
         """
-        k = self.k_proj(key)
-        q = self.q_proj(query)
-        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        batch_size, num_query_instances, feature_dim = query.size()
+        num_window_instances = key.shape[1]
+
+        # if stacked embeddings, create channels for each x,y,t embedding dimension
+        # maps shape (1,num_instances*3,feature_dim) -> (num_instances,3,feature_dim)
+        if self.embedding_agg_method == "stack":
+            key_stacked = (
+                key
+                .view(batch_size, 3, num_window_instances // 3, feature_dim)
+                .permute(0, 2, 1, 3)
+                .squeeze(0) # keep as (num_instances*3, feature_dim)
+            )
+            key_orig = key.squeeze(0) # keep as (num_instances*3, feature_dim)
+
+            query = (
+                query.view(batch_size, 3, num_query_instances // 3, feature_dim)
+                .permute(0, 2, 1, 3)
+                .squeeze(0)
+            )
+            # pass t,x,y frame features through cross attention with entire encoder 3*num_window_instances tokens before MLP;
+            # note order is t,x,y
+            out_t, _ = self.attn_t(query=query[:,0,:], key=key_orig, value=key_orig)
+            out_x, _ = self.attn_x(query=query[:,1,:], key=key_orig, value=key_orig)
+            out_y, _ = self.attn_y(query=query[:,2,:], key=key_orig, value=key_orig)
+            # combine each attention output to (num_instances, 3, feature_dim)
+            collated = torch.stack((out_t, out_x, out_y), dim=0).permute(1,0,2)
+            # mlp_out has shape (1, num_window_instances, feature_dim)
+            mlp_out = self.q_proj(collated).transpose(1,0)
+
+            # key, query of shape (num_instances, 3, feature_dim)
+            # TODO: uncomment this if not using modified attention heads for t,x,y
+            k = self.k_proj(key_stacked).transpose(1, 0)
+            # q = self.q_proj(query).transpose(1, 0)
+            # k,q of shape (num_instances, feature_dim)
+            attn_weights = torch.bmm(mlp_out, k.transpose(1, 2))
+        else:
+            k = self.k_proj(key)
+            q = self.q_proj(query)
+            attn_weights = torch.bmm(q, k.transpose(1, 2))
+
 
         return attn_weights  # (B, N_t, N)
-Original file line number
+Diff line change
@@ Expand Up / @@ -77,4 +77,4 @@ def run(cfg: DictConfig) -> dict[int, sio.Labels]: @@
         # override with params config, and specific params:
         # python eval.py --config-dir=./configs --config-name=inference +params_config=configs/params.yaml dataset.train_dataset.padding=10
-        run()
+        run()