working on refinent with plane fitting. Also camera stream added

TychoBomer · Jan 23, 2025 · da553d5 · da553d5
1 parent 9c01b16
commit da553d5
Show file tree

Hide file tree

Showing 25 changed files with 74 additions and 214 deletions.
diff --git a/cat_img.png b/cat_img.png
diff --git a/configurations/sam2_zed_small.yaml b/configurations/sam2_zed_small.yaml
@@ -6,12 +6,12 @@ grounding_dino:
   text_threshold: 0.25
   nms_threshold: 0.6
   # config_path: "/home/nakama/Documents/TychoMSC/models/sam2_track_test/segment-anything-2-real-time/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
-  config_path: "/home/nakamalab/Documents/zed_real_time_ROI_depth/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
+  config_path: "./GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
   # checkpoint_path: "/home/nakama/Documents/TychoMSC/models/sam2_track_test/segment-anything-2-real-time/GroundingDINO/weights/groundingdino_swint_ogc.pth"
-  checkpoint_path: "/home/nakamalab/Documents/zed_real_time_ROI_depth/GroundingDINO/weights/groundingdino_swint_ogc.pth"
+  checkpoint_path: "./GroundingDINO/weights/groundingdino_swint_ogc.pth"
 
 sam2:
-  checkpoint: "/home/nakamalab/Documents/zed_real_time_ROI_depth/checkpoints/sam2_hiera_small.pt"
+  checkpoint: "./checkpoints/sam2_hiera_small.pt"
   model_cfg: "sam2_configs/sam2_hiera_s.yaml"
 
 camera:

diff --git a/main.py b/main.py
diff --git a/main_2.py b/main_2.py
diff --git a/output/left_img_og.png b/output/left_img_og.png
diff --git a/output/norm_depth.png b/output/norm_depth.png
diff --git a/output/norm_depth_heatmap.jpg b/output/norm_depth_heatmap.jpg
diff --git a/output/output.mp4 b/output/output.mp4
diff --git a/output/output_depth.mp4 b/output/output_depth.mp4
diff --git a/output/refined_depth.png b/output/refined_depth.png
diff --git a/output/test.png b/output/test.png
diff --git a/output_img/left_img_og.png b/output_img/left_img_og.png
diff --git a/output_img/norm_depth.png b/output_img/norm_depth.png
diff --git a/output_img/norm_depth_heatmap.jpg b/output_img/norm_depth_heatmap.jpg
diff --git a/output_img/refined_depth.png b/output_img/refined_depth.png
diff --git a/output_img/test.png b/output_img/test.png
diff --git a/pipeline.log b/pipeline.log
diff --git a/scripts/output/left_img_og.png b/scripts/output/left_img_og.png
diff --git a/scripts/output/norm_depth.png b/scripts/output/norm_depth.png
diff --git a/scripts/output/norm_depth_heatmap.jpg b/scripts/output/norm_depth_heatmap.jpg
diff --git a/scripts/output/refined_depth.png b/scripts/output/refined_depth.png
diff --git a/scripts/output/test.png b/scripts/output/test.png
diff --git a/scripts/sam2_track_zed.py b/scripts/sam2_track_zed.py
@@ -91,6 +91,15 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
     ann_frame_idx = 0
     ann_obj_id = 1
 
+
+
+    # Define the codec and create VideoWriter object
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter('/home/nakama/Documents/TychoMSC/models/sam2_track_test/segment-anything-2-real-time/output/output.mp4', fourcc, 20, (1280, 720), True)
+    out2 = cv2.VideoWriter('/home/nakama/Documents/TychoMSC/models/sam2_track_test/segment-anything-2-real-time/output/output_depth.mp4', fourcc, 20, (1280, 720), False)
+
+    framecount = 0
+
     try:
         while True:
             ts = time.time()
@@ -99,27 +108,29 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
                 # Extract from ZED camera
                 left_image = wrapper.output_image
                 depth_map = wrapper.output_measure
+
+
                 norm_depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
 
-                depth_resized = resize_to_multiple(depth_map, 14)
-                depth_tensor = torch.tensor(depth_resized, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to('cuda')
+                # depth_resized = resize_to_multiple(depth_map, 14)
+                # depth_tensor = torch.tensor(depth_resized, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to('cuda')
 
-                prompt_depth = to_numpy_func(depth_tensor)
-                prompt_depth_vis,depth_min, depth_max= visualize_depth(prompt_depth, ret_minmax=True)
-                prompt_depth_vis,depth_min, depth_max= visualize_depth(prompt_depth, ret_minmax=True, depth_min=depth_min, depth_max=depth_max)
+                # prompt_depth = to_numpy_func(depth_tensor)
+                # prompt_depth_vis,depth_min, depth_max= visualize_depth(prompt_depth, ret_minmax=True)
+                # prompt_depth_vis,depth_min, depth_max= visualize_depth(prompt_depth, ret_minmax=True, depth_min=depth_min, depth_max=depth_max)
 
-                imageio.imwrite(os.path.join(output_dir, 'norm_depth_heatmap.jpg'), prompt_depth_vis)
+                # imageio.imwrite(os.path.join(output_dir, 'norm_depth_heatmap.jpg'), prompt_depth_vis)
 
 
                 cv2.imwrite(os.path.join(output_dir, 'norm_depth.png'), norm_depth_map)
 
-                heatmap_depth_map = visualize_depth(norm_depth_map, cmap='Spectral')
-                cv2.imwrite(os.path.join(output_dir, 'norm_depth_heatmap.jpg'), heatmap_depth_map)
+                # heatmap_depth_map = visualize_depth(norm_depth_map, cmap='Spectral')
+                # cv2.imwrite(os.path.join(output_dir, 'norm_depth_heatmap.jpg'), heatmap_depth_map)
 
 
                 left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_RGBA2RGB)
                 height, width, _ = left_image_rgb.shape
-                cv2.imwrite(os.path.join(output_dir, 'left_img_og.png'), left_image_rgb)
+                # cv2.imwrite(os.path.join(output_dir, 'left_img_og.png'), left_image_rgb)
 
                 # Check if there is a new caption in the queue
                 if caption_queue and not caption_queue.empty():
@@ -157,7 +168,7 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
 
                             for box in detections.xyxy:
                                 x1, y1, x2, y2 = map(int, box)
-                                cv2.rectangle(left_image_rgb, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                                # cv2.rectangle(left_image_rgb, (x1, y1), (x2, y2), (0, 255, 0), 2)
 
                                 input_boxes = np.array([[x1, y1], [x2, y2]], dtype=np.float32)
                                 _, out_obj_ids, out_mask_logits = sam2_predictor.add_new_prompt(
@@ -224,28 +235,55 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
 
 
                 left_image_rgb = cv2.addWeighted(left_image_rgb, 1, all_mask, 0.5, 0)
+                if framecount < 500:
+                    # left_image_rgb = np.array(left_image_rgb)
+                    # norm_depth_map = np.array(norm_depth_map)
+                    # matr = np.ones((1440,1280,4), dtype='uint8')
+                    # # stack = np.vstack((left_image, np.expand_dims(norm_depth_map,axis=2)))
+                    # matr[0:720,:,0:3] = left_image_rgb
+                    # matr[720::,:,0:3]  = norm_depth_map[:,:,np.newaxis]
+                    # cv2.imwrite('cat_img.png',matr)
+                    # out.write(matr)
+
+                    out.write(left_image_rgb)
+                    out2.write(norm_depth_map)
+                    framecount+=1
+                else:
+                    out.write(left_image_rgb)
+                    out.release()
+                    out2.write(norm_depth_map)
+                    out2.release()
+                    cv2.destroyAllWindows()
+                    break
 
                 # *Refine the depth mask using masks
                 if cfg.depth.refine_depth:
+                    guidance_img = np.sum([mask for mask in obj_masks.values()], axis=0).astype(np.uint8)
                     # refined_depth_map = mask_guided_filter(depth_map, left_image_rgb, obj_masks)
                     # refined_depth_map = refine_depth_with_postprocessing(depth_map, left_image_rgb, obj_masks)     
                     # refined_depth_map = refine_depth_with_wjbf_and_sdcf(depth_map, obj_masks, guidance_img, sigma_spatial=15, sigma_range=30)      
-                    # refined_depth_map = refine_depth_with_plane_fitting(depth_map, obj_masks)
+
+
+                    # NOTE: plane fiting best one so far
+                    refined_depth_map = refine_depth_with_plane_fitting(depth_map, obj_masks)
+
+
                     # refined_depth_map = refine_depth_with_mhmf(depth_map, obj_masks, depth_threshold=(1, 255))            
-                    guidance_img = np.sum([mask for mask in obj_masks.values()], axis=0).astype(np.uint8)
-                    refined_depth_map = refine_depth_with_hole_filling(
-                    current_depth=norm_depth_map,
-                    previous_depth=previous_depth,
-                    obj_masks=obj_masks,
-                    guidance_img=guidance_img,
-                    max_distance=5,
-                    alpha=0.5,
-                    kernel_size=5
-                    )
 
 
+
+                    # refined_depth_map = refine_depth_with_hole_filling(
+                    # current_depth=norm_depth_map,
+                    # previous_depth=previous_depth,
+                    # obj_masks=obj_masks,
+                    # guidance_img=guidance_img,
+                    # max_distance=5,
+                    # alpha=0.5,
+                    # kernel_size=5
+                    # )
+
                     refined_depth_map = cv2.normalize(refined_depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
-                    previous_depth = norm_depth_map
+                    # previous_depth = norm_depth_map
                     cv2.imwrite(os.path.join(output_dir, 'refined_depth.png'), refined_depth_map)
                 ann_frame_idx+=1
 
@@ -273,15 +311,15 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
 
     with initialize(config_path="../configurations"):
         cfg = compose(config_name="sam2_zed_small")
-        # sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='apple')
+        sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='pencil')
 
 
         # point_coords = [(390, 200)]
         # labels = [1]  # 1 = foreground, 0 = background
         # sam2_prompt = Sam2PromptType('point', point_coords = point_coords, labels=labels)
 
-        bbox_coords = [(320, 120, 470, 280)]
+        # bbox_coords = [(320, 120, 470, 280)]
         # bbox_coords = [(50, 50, 150, 150), (200, 200, 300, 300)] #! NOTE: 3+ boxes make it really inaccurate
-        sam2_prompt = Sam2PromptType('bbox', bbox_coords = bbox_coords)
+        # sam2_prompt = Sam2PromptType('bbox', bbox_coords = bbox_coords)
 
         run(cfg, sam2_prompt=sam2_prompt)
diff --git a/scripts/utils/depth_utils.py b/scripts/utils/depth_utils.py
@@ -1,9 +1,6 @@
 import cv2
 import numpy as np
 import matplotlib
-
-
-
 from sklearn.linear_model import RANSACRegressor
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.linear_model import LinearRegression
@@ -224,7 +221,7 @@ def fit_plane_to_segment(depth_map, mask):
         return depth_map
 
     # Fit a plane using polynomial regression
-    poly = PolynomialFeatures(degree=1)  # Linear plane
+    poly = PolynomialFeatures(degree=2)  # Linear plane
     coords = np.column_stack((x, y))
     coords_poly = poly.fit_transform(coords)
     model = RANSACRegressor(LinearRegression(), residual_threshold=2.0)
@@ -238,11 +235,15 @@ def fit_plane_to_segment(depth_map, mask):
 
     # Clamp fitted values to the original depth range
     fitted_depth = np.clip(fitted_depth, depth_map.min(), depth_map.max())
+
+    #! Need to add filtering to the depth map
+    # fitted_depth = preprocess_depth_map(fitted_depth,5)
 
     # Combine fitted depth values with the original depth map
     refined_depth = depth_map.copy()
     refined_depth[mask > 0] = fitted_depth[mask > 0]
 
+
     return refined_depth
 
 
@@ -264,7 +265,8 @@ def refine_depth_with_plane_fitting(depth_map, obj_masks):
             mask = mask[:, :, 0]
         obj_mask = mask.astype(np.uint8)
 
-        if np.any(obj_mask > 0):  # Process only if the mask has valid regions
+    # if np.any(obj_mask > 0):  # Process only if the mask has valid regions
+        if np.count_nonzero(obj_mask) > 100: # should have 100 pixel at least in the max for proper plane estimation
             print(f"Processing Object {obj_id}")
             refined_depth_map = fit_plane_to_segment(refined_depth_map, obj_mask)