From a1e13018dbe6b18b0b91f738ebe88ab1417f2def Mon Sep 17 00:00:00 2001 From: danellecline Date: Thu, 21 Nov 2024 14:14:39 -0800 Subject: [PATCH] refactor: rename vits prediction option to --use-vits --- README.md | 19 ++++++++++--------- sdcat/cluster/cluster.py | 8 ++++---- sdcat/cluster/commands.py | 12 ++++++------ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index f871595..cd9c2b2 100644 --- a/README.md +++ b/README.md @@ -68,11 +68,12 @@ The algorithm workflow looks like this: | google/vit-base-patch16-224(default) | 16 block size trained on ImageNet21k with 21k classes | | facebook/dino-vits8 | trained on ImageNet which contains 1.3 M images with labels from 1000 classes | | facebook/dino-vits16 | trained on ImageNet which contains 1.3 M images with labels from 1000 classes | +| MBARI-org/mbari-uav-vit-b-16 | MBARI UAV vits16 model trained on 10425 UAV images with labels from 21 classes | Smaller block_size means more patches and more accurate fine-grained clustering on smaller objects, so ViTS models with 8 block size are recommended for fine-grained clustering on small objects, and 16 is recommended for coarser clustering on larger objects. We recommend running with multiple models to see which model works best for your data, -and to experiment with the --min_samples and --min-cluster-size options to get good clustering results. +and to experiment with the --min-samples and --min-cluster-size options to get good clustering results. # Installation @@ -145,7 +146,7 @@ Commands: ## File organization -The sdcat toolkit generates data in the following folders. Here, we assume both detection and clustering is output to the same root folder.: +The sdcat toolkit generates data in the following folders. Here, we assume both detection and clustering is stored in the same root folder: ``` /data/20230504-MBARI/ @@ -173,23 +174,23 @@ The sdcat toolkit generates data in the following folders. Here, we assume both ``` -## Process images creating bounding box detections with the YOLOv5 model. -The YOLOv5s model is not as accurate as other models, but is fast and good for detecting larger objects in images, +## Process images creating bounding box detections with the YOLOv8s model. +The YOLOv8s model is not as accurate as other models, but is fast and good for detecting larger objects in images, and good for experiments and quick results. **Slice size** is the size of the detection window. The default is to allow the SAHI algorithm to determine the slice size; a smaller slice size will take longer to process. ```shell -sdcat detect --image-dir --save-dir --model yolov5s --slice-size-width 900 --slice-size-height 900 +sdcat detect --image-dir --save-dir --model yolov8s --slice-size-width 900 --slice-size-height 900 ``` -## Cluster detections from the YOLOv5 model +## Cluster detections from the YOLOv8s model, but use the classifications from the ViT model. -Cluster the detections from the YOLOv5 model. The detections are clustered using cosine similarity and embedding -features from a FaceBook Vision Transformer (ViT) model. +Cluster the detections from the YOLOv8s model. The detections are clustered using cosine similarity and embedding +features from the default Vision Transformer (ViT) model `google/vit-base-patch16-224` ```shell -sdcat cluster --det-dir --save-dir --model yolov5s +sdcat cluster --det-dir /yolov8s/det_filtered --save-dir --use-vits ``` diff --git a/sdcat/cluster/cluster.py b/sdcat/cluster/cluster.py index ba900f2..351c0fa 100755 --- a/sdcat/cluster/cluster.py +++ b/sdcat/cluster/cluster.py @@ -323,7 +323,7 @@ def cluster_vits( min_cluster_size: int, min_samples: int, device: str = "cpu", - use_predictions: bool = False, + use_vits: bool = False, use_tsne: bool = False, skip_visualization: bool = False, roi: bool = False) -> pd.DataFrame: @@ -340,7 +340,7 @@ def cluster_vits( :param min_cluster_size: The minimum number of samples in a cluster :param min_samples:The number of samples in a neighborhood for a point :param device: The device to use for clustering, 'cpu' or 'cuda' - :param use_predictions: Whether to use the predictions from the model used for clustering to assign classes + :param use_vits: Set to using the predictions from the vits cluster model :param skip_visualization: Whether to skip the visualization of the clusters :param use_tsne: Whether to use t-SNE for dimensionality reduction :return: a dataframe with the assigned cluster indexes, or -1 for non-assigned.""" @@ -452,8 +452,8 @@ def cluster_vits( debug(f'Adding {images[idx]} to cluster id {cluster_id} ') df_dets.loc[df_dets['crop_path'] == images[idx], 'cluster'] = cluster_id - # If use_predictions is true, then assign the class to each detection - if use_predictions: + # If use_vits is true, then assign the class to each detection + if use_vits: for idx, row in df_dets.iterrows(): predictions, scores = image_predictions[idx], image_scores[idx] df_dets.loc[idx, 'class'] = predictions[0] # Use the top prediction diff --git a/sdcat/cluster/commands.py b/sdcat/cluster/commands.py index 77de5a5..6c3b03d 100644 --- a/sdcat/cluster/commands.py +++ b/sdcat/cluster/commands.py @@ -35,8 +35,8 @@ @click.option('--det-dir', help='Input folder(s) with raw detection results', multiple=True, required=True) @click.option('--save-dir', help='Output directory to save clustered detection results', required=True) @click.option('--device', help='Device to use, e.g. cpu or cuda:0', type=str, default='cpu') -@click.option('--use-predictions', help='Set to using the cluster model for prediction', is_flag=True) -def run_cluster_det(det_dir, save_dir, device, use_predictions, config_ini, alpha, cluster_selection_epsilon, cluster_selection_method, min_cluster_size, start_image, end_image, use_tsne, skip_visualization): +@click.option('--use-vits', help='Set to using the predictions from the vits cluster model', is_flag=True) +def run_cluster_det(det_dir, save_dir, device, use_vits, config_ini, alpha, cluster_selection_epsilon, cluster_selection_method, min_cluster_size, start_image, end_image, use_tsne, skip_visualization): config = cfg.Config(config_ini) max_area = int(config('cluster', 'max_area')) min_area = int(config('cluster', 'min_area')) @@ -259,7 +259,7 @@ def is_day(utc_dt): # Cluster the detections df_cluster = cluster_vits(prefix, model, df, save_dir, alpha, cluster_selection_epsilon, cluster_selection_method, min_similarity, min_cluster_size, min_samples, device, use_tsne=use_tsne, - skip_visualization=skip_visualization, roi=False, use_predictions=use_predictions) + skip_visualization=skip_visualization, roi=False, use_vits=use_vits) # Merge the results with the original DataFrame df.update(df_cluster) @@ -281,8 +281,8 @@ def is_day(utc_dt): @click.option('--roi-dir', help='Input folder(s) with raw ROI images', multiple=True, required=True) @click.option('--save-dir', help='Output directory to save clustered detection results', required=True) @click.option('--device', help='Device to use, e.g. cpu or cuda:0', type=str) -@click.option('--use-predictions', help='Set to using the cluster model for prediction', is_flag=True) -def run_cluster_roi(roi_dir, save_dir, device, use_predictions, config_ini, alpha, cluster_selection_epsilon, cluster_selection_method, min_cluster_size, use_tsne, skip_visualization): +@click.option('--use-vits', help='Set to using the predictions from the vits cluster model', is_flag=True) +def run_cluster_roi(roi_dir, save_dir, device, use_vits, config_ini, alpha, cluster_selection_epsilon, cluster_selection_method, min_cluster_size, use_tsne, skip_visualization): config = cfg.Config(config_ini) min_samples = int(config('cluster', 'min_samples')) alpha = alpha if alpha else float(config('cluster', 'alpha')) @@ -363,7 +363,7 @@ def run_cluster_roi(roi_dir, save_dir, device, use_predictions, config_ini, alph # Cluster the detections df_cluster = cluster_vits(prefix, model, df, save_dir, alpha, cluster_selection_epsilon, cluster_selection_method, min_similarity, min_cluster_size, min_samples, device, use_tsne, - skip_visualization=skip_visualization, use_predictions=use_predictions, roi=True) + skip_visualization=skip_visualization, use_vits=use_vits, roi=True) # Merge the results with the original DataFrame df.update(df_cluster)