fixes resolution error

TychoBomer · Feb 18, 2025 · 7a1985f · 7a1985f
1 parent 6b93097
commit 7a1985f
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 67 deletions.
diff --git a/configurations/sam2_zed_small.yaml b/configurations/sam2_zed_small.yaml
@@ -17,7 +17,7 @@ sam2:
 camera:
   connection_type: "svo"
   serial_number: 0
-  svo_input_filename: "./output/bottle_aruco_720.svo2"
+  svo_input_filename: "./output/720/apple_3x_banana_720.svo2"
   sender_ip: "127.0.0.1"
   port: 30000
   depth_mode: "NEURAL"

diff --git a/output/norm_depth.png b/output/norm_depth.png
diff --git a/output/refined_depth.png b/output/refined_depth.png
diff --git a/output/test.png b/output/test.png
diff --git a/scripts/sam2_track_zed.py b/scripts/sam2_track_zed.py
@@ -279,7 +279,7 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:
 
     with initialize(config_path="../configurations"):
         cfg = compose(config_name="sam2_zed_small")
-        sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='yellow')
+        sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='apple')
 
 
         # point_coords = [(390, 200)]

diff --git a/scripts/sam2_track_zed_video.py b/scripts/sam2_track_zed_video.py
@@ -23,7 +23,7 @@
 torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
 
 
-from wrappers.pyzed_wrapper import pyzed_wrapper as pw
+from wrappers.pyzed_wrapper import pyzed_wrapper_v2 as pw
 from utils.utils import *
 from scripts.utils.depth_utils import (
     depth_refinement_RANSAC_plane_fitting,
@@ -38,7 +38,7 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =
 
     # Build needed models
     Log.info("Building models...", tag="building_models")
-    wrapper = pw.Wrapper("svo" if svo_file else cfg.camera.connection_type)
+    wrapper = pw.Wrapper(cfg.camera)
 
     # If using playback, set SVO file path
     if svo_file:
@@ -63,15 +63,17 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =
 
     try:
         while True:
-            ts = time.time()
+            # ts = time.time()
 
             if wrapper.retrieve(is_image=True, is_measure=True):
                 # Extract images
                 left_image = wrapper.output_image
-                depth_map = wrapper.output_measure
+
+                # depth_map = wrapper.output_measure
 
                 # norm_depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
                 left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_RGBA2RGB)
+                height, width, _ = left_image_rgb.shape
 
                 # # Display and save
                 # cv2.imshow("ZED Left", left_image_rgb)
@@ -80,10 +82,12 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =
                 cv2.imwrite(f"output/VID_WATCHER_.png", left_image_rgb)
                 # cv2.imwrite(f"output/depth_{framecount}.png", norm_depth_map)
 
-                if framecount > 1000:
+                if framecount > 500:
                     break
 
                 framecount += 1
+                # te = time.time()
+                # print(f"fps: {1/(te-ts)}")
 
     except Exception as e:
         Log.error(f"An error occurred: {e}", tag="runtime_error")

diff --git a/scripts/wrappers/pyzed_wrapper/pyzed_wrapper_v2.py b/scripts/wrappers/pyzed_wrapper/pyzed_wrapper_v2.py
@@ -1,39 +1,24 @@
-"""
-Based on Stereolabs ZED API
-https://github.com/stereolabs/zed-examples/
-https://www.stereolabs.com/docs/api/python/
-
-Image and video export is not included to avoid dependencies on OpenCV.
-You can write this yourself from the numpy arrays.
-
-TODO: Incorporate proper logging on all errors and prints.
-"""
-
 import pyzed.sl as sl
-
-
 class Wrapper:
     """
     Wrapper for ZED API (v3.6) with settings loaded from Hydra YAML.
     """
-
     def __init__(self, camera_cfg: dict):
         """
         Initializes the ZED camera with settings from Hydra YAML.
-
         =INPUT=
             camera_cfg - Dict from Hydra (cfg.camera)
         """
-
         self.camera_cfg = camera_cfg
         self.initial_parameters = sl.InitParameters()
         self.runtime_parameters = sl.RuntimeParameters()
         self.streaming_parameters = sl.StreamingParameters()
         self.recording_parameters = sl.RecordingParameters()
         self.camera = sl.Camera()
+        self.mat_image = sl.Mat()
+        self.mat_measure = sl.Mat()
         self.grab_status = None
         self.timestamp_nanoseconds = 0
-
         # Set camera input type
         if camera_cfg["connection_type"] == "id":
             self.initial_parameters.set_from_camera_id(camera_cfg.get("id", 0))
@@ -43,166 +28,132 @@ def __init__(self, camera_cfg: dict):
             self.initial_parameters.set_from_svo_file(camera_cfg["svo_input_filename"])
         elif camera_cfg["connection_type"] == "stream":
             self.initial_parameters.set_from_stream(camera_cfg["sender_ip"], camera_cfg["port"])
-
         # Set additional camera parameters
         self.initial_parameters.depth_mode = getattr(sl.DEPTH_MODE, camera_cfg["depth_mode"])
         self.initial_parameters.camera_resolution = getattr(sl.RESOLUTION, camera_cfg["resolution"])
         self.initial_parameters.coordinate_units = getattr(sl.UNIT, camera_cfg["coordinate_units"])
         self.initial_parameters.camera_fps = camera_cfg["camera_fps"]
         self.initial_parameters.depth_minimum_distance = camera_cfg["depth_min_distance"]
         self.initial_parameters.depth_maximum_distance = camera_cfg["depth_max_distance"]
-
-
         # Set runtime parameters
         self.runtime_parameters.enable_fill_mode = camera_cfg["enable_fill_mode"]
-
         # Numpy output arrays
         self.output_image = None
         self.output_measure = None
-
+    
     def get_intrinsic(self):
         """
         Get camera intrinsic parameters.
-
         =OUTPUT=
             intrinsic_left, intrinsic_right - CameraParameters (see Stereolabs API)
         """
         camera_information = self.camera.get_camera_information()
         intrinsic_left = camera_information.camera_configuration.calibration_parameters_raw.left_cam
         intrinsic_right = camera_information.camera_configuration.calibration_parameters_raw.right_cam
         return intrinsic_left, intrinsic_right
-
+    
     def open_input_source(self):
         """
         Open the camera, stream, or file connection.
         """
-
-
-
         status = self.camera.open(self.initial_parameters)
-
-        camera_info = self.camera.get_camera_information()
-        self.width = camera_info.camera_configuration.resolution.width 
-        self.height = camera_info.camera_configuration.resolution.height
-        print(f"🔍 Actual Camera Resolution: {self.width}x{self.height}")  # Debugging
-
-        self.mat_image = sl.Mat(self.width, self.height)
-        self.mat_measure = sl.Mat(self.width, self.height)
-
         if status != sl.ERROR_CODE.SUCCESS:
             raise Exception(f"Failed to open input source '{self.camera_cfg['connection_type']}': {status}")
         return True
-
     def close_input_source(self):
         """
         Free mat memory (CPU + GPU) and close the camera connection.
         """
         self.output_image = None
         self.output_measure = None
-
         self.mat_image.free(sl.MEM.CPU)
         self.mat_measure.free(sl.MEM.CPU)
-
         self.camera.close()
         return True
-
+    
     def start_stream(self):
         """
         Start streaming data. Must be called after open_input_source.
         """
         if not self.camera.is_opened():
             raise Exception("Camera is not opened. Cannot start streaming.")
-
         status = self.camera.enable_streaming(self.streaming_parameters)
         if status != sl.ERROR_CODE.SUCCESS:
             raise Exception(f"Failed to start stream: {status}")
         return True
-
+    
     def stop_stream(self):
         """
         Stop streaming.
         """
         self.camera.disable_streaming()
         return True
-
+    
     def start_recording(self, filename="output.svo"):
         """
         Start recording data into an SVO file.
         """
         if not self.camera.is_opened():
             raise Exception("Camera is not opened. Cannot start recording.")
-
         self.recording_parameters.video_filename = filename
         status = self.camera.enable_recording(self.recording_parameters)
         if status != sl.ERROR_CODE.SUCCESS:
             raise Exception(f"Failed to start recording: {status}")
         return True
-
+    
     def stop_recording(self):
         """
         Stop recording.
         """
         self.camera.disable_recording()
         return True
-
     def retrieve(self, is_image=True, is_measure=False):
         """
         Retrieve captured image frame or depth measure.
-
         =NOTES=
             - It is likely more efficient to obtain SIDE_BY_SIDE and then split the array
               rather than calling retrieve_image twice for left and right separately.
             - However, obtaining only IMAGE_LEFT is likely more efficient than getting SIDE_BY_SIDE.
-
         TODO: Optimize by using GPU and reducing unnecessary CPU-GPU copies.
         """
-
         self.grab_status = self.camera.grab(self.runtime_parameters)
         if self.grab_status == sl.ERROR_CODE.SUCCESS:
             if is_image:
                 self.camera.retrieve_image(self.mat_image, sl.VIEW.LEFT)
                 self.output_image = self.mat_image.get_data()
                 self.timestamp_nanoseconds = self.mat_image.timestamp.data_ns
-
             if is_measure:
                 self.camera.retrieve_measure(self.mat_measure, sl.MEASURE.DEPTH_U16_MM)
                 self.output_measure = self.mat_measure.get_data()
         else:
             print("Failed to grab camera frame.")
             return False
         return True
-
+    
     def export_png(self, filename, is_image=True, is_measure=False):
         """
         Export the last retrieved frame as a PNG image.
-
         =INPUT=
             filename - string
                 Path name + file name + .png extension
         """
-
         if self.grab_status != sl.ERROR_CODE.SUCCESS:
             return False
-
         if is_image:
             self.mat_image.write(filename)
-
         if is_measure:
             self.mat_measure.write(filename)
-
         return True
-
+    
     def depth_at_xy(self, xpos: int = 0, ypos: int = 0):
         """
         Get the depth value at a specific pixel coordinate.
-
         =INPUT=
             xpos - int : X-coordinate of the pixel
             ypos - int : Y-coordinate of the pixel
-
         =OUTPUT=
             Depth value at (xpos, ypos)
         """
         if self.output_measure is not None:
             return self.output_measure[xpos, ypos]
-        return None
+        return None