Skip to content

Commit

Permalink
fixes resolution error
Browse files Browse the repository at this point in the history
  • Loading branch information
TychoBomer committed Feb 18, 2025
1 parent 6b93097 commit 7a1985f
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 67 deletions.
2 changes: 1 addition & 1 deletion configurations/sam2_zed_small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sam2:
camera:
connection_type: "svo"
serial_number: 0
svo_input_filename: "./output/bottle_aruco_720.svo2"
svo_input_filename: "./output/720/apple_3x_banana_720.svo2"
sender_ip: "127.0.0.1"
port: 30000
depth_mode: "NEURAL"
Expand Down
Binary file modified output/norm_depth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/refined_depth.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion scripts/sam2_track_zed.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def run(cfg, sam2_prompt: Sam2PromptType) -> None:

with initialize(config_path="../configurations"):
cfg = compose(config_name="sam2_zed_small")
sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='yellow')
sam2_prompt = Sam2PromptType('g_dino_bbox',user_caption='apple')


# point_coords = [(390, 200)]
Expand Down
14 changes: 9 additions & 5 deletions scripts/sam2_track_zed_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()


from wrappers.pyzed_wrapper import pyzed_wrapper as pw
from wrappers.pyzed_wrapper import pyzed_wrapper_v2 as pw
from utils.utils import *
from scripts.utils.depth_utils import (
depth_refinement_RANSAC_plane_fitting,
Expand All @@ -38,7 +38,7 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =

# Build needed models
Log.info("Building models...", tag="building_models")
wrapper = pw.Wrapper("svo" if svo_file else cfg.camera.connection_type)
wrapper = pw.Wrapper(cfg.camera)

# If using playback, set SVO file path
if svo_file:
Expand All @@ -63,15 +63,17 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =

try:
while True:
ts = time.time()
# ts = time.time()

if wrapper.retrieve(is_image=True, is_measure=True):
# Extract images
left_image = wrapper.output_image
depth_map = wrapper.output_measure

# depth_map = wrapper.output_measure

# norm_depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_RGBA2RGB)
height, width, _ = left_image_rgb.shape

# # Display and save
# cv2.imshow("ZED Left", left_image_rgb)
Expand All @@ -80,10 +82,12 @@ def run(cfg, sam2_prompt: Sam2PromptType, record: bool = False, svo_file: str =
cv2.imwrite(f"output/VID_WATCHER_.png", left_image_rgb)
# cv2.imwrite(f"output/depth_{framecount}.png", norm_depth_map)

if framecount > 1000:
if framecount > 500:
break

framecount += 1
# te = time.time()
# print(f"fps: {1/(te-ts)}")

except Exception as e:
Log.error(f"An error occurred: {e}", tag="runtime_error")
Expand Down
71 changes: 11 additions & 60 deletions scripts/wrappers/pyzed_wrapper/pyzed_wrapper_v2.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,24 @@
"""
Based on Stereolabs ZED API
https://github.com/stereolabs/zed-examples/
https://www.stereolabs.com/docs/api/python/
Image and video export is not included to avoid dependencies on OpenCV.
You can write this yourself from the numpy arrays.
TODO: Incorporate proper logging on all errors and prints.
"""

import pyzed.sl as sl


class Wrapper:
"""
Wrapper for ZED API (v3.6) with settings loaded from Hydra YAML.
"""

def __init__(self, camera_cfg: dict):
"""
Initializes the ZED camera with settings from Hydra YAML.
=INPUT=
camera_cfg - Dict from Hydra (cfg.camera)
"""

self.camera_cfg = camera_cfg
self.initial_parameters = sl.InitParameters()
self.runtime_parameters = sl.RuntimeParameters()
self.streaming_parameters = sl.StreamingParameters()
self.recording_parameters = sl.RecordingParameters()
self.camera = sl.Camera()
self.mat_image = sl.Mat()
self.mat_measure = sl.Mat()
self.grab_status = None
self.timestamp_nanoseconds = 0

# Set camera input type
if camera_cfg["connection_type"] == "id":
self.initial_parameters.set_from_camera_id(camera_cfg.get("id", 0))
Expand All @@ -43,166 +28,132 @@ def __init__(self, camera_cfg: dict):
self.initial_parameters.set_from_svo_file(camera_cfg["svo_input_filename"])
elif camera_cfg["connection_type"] == "stream":
self.initial_parameters.set_from_stream(camera_cfg["sender_ip"], camera_cfg["port"])

# Set additional camera parameters
self.initial_parameters.depth_mode = getattr(sl.DEPTH_MODE, camera_cfg["depth_mode"])
self.initial_parameters.camera_resolution = getattr(sl.RESOLUTION, camera_cfg["resolution"])
self.initial_parameters.coordinate_units = getattr(sl.UNIT, camera_cfg["coordinate_units"])
self.initial_parameters.camera_fps = camera_cfg["camera_fps"]
self.initial_parameters.depth_minimum_distance = camera_cfg["depth_min_distance"]
self.initial_parameters.depth_maximum_distance = camera_cfg["depth_max_distance"]


# Set runtime parameters
self.runtime_parameters.enable_fill_mode = camera_cfg["enable_fill_mode"]

# Numpy output arrays
self.output_image = None
self.output_measure = None

def get_intrinsic(self):
"""
Get camera intrinsic parameters.
=OUTPUT=
intrinsic_left, intrinsic_right - CameraParameters (see Stereolabs API)
"""
camera_information = self.camera.get_camera_information()
intrinsic_left = camera_information.camera_configuration.calibration_parameters_raw.left_cam
intrinsic_right = camera_information.camera_configuration.calibration_parameters_raw.right_cam
return intrinsic_left, intrinsic_right

def open_input_source(self):
"""
Open the camera, stream, or file connection.
"""



status = self.camera.open(self.initial_parameters)

camera_info = self.camera.get_camera_information()
self.width = camera_info.camera_configuration.resolution.width
self.height = camera_info.camera_configuration.resolution.height
print(f"🔍 Actual Camera Resolution: {self.width}x{self.height}") # Debugging

self.mat_image = sl.Mat(self.width, self.height)
self.mat_measure = sl.Mat(self.width, self.height)

if status != sl.ERROR_CODE.SUCCESS:
raise Exception(f"Failed to open input source '{self.camera_cfg['connection_type']}': {status}")
return True

def close_input_source(self):
"""
Free mat memory (CPU + GPU) and close the camera connection.
"""
self.output_image = None
self.output_measure = None

self.mat_image.free(sl.MEM.CPU)
self.mat_measure.free(sl.MEM.CPU)

self.camera.close()
return True

def start_stream(self):
"""
Start streaming data. Must be called after open_input_source.
"""
if not self.camera.is_opened():
raise Exception("Camera is not opened. Cannot start streaming.")

status = self.camera.enable_streaming(self.streaming_parameters)
if status != sl.ERROR_CODE.SUCCESS:
raise Exception(f"Failed to start stream: {status}")
return True

def stop_stream(self):
"""
Stop streaming.
"""
self.camera.disable_streaming()
return True

def start_recording(self, filename="output.svo"):
"""
Start recording data into an SVO file.
"""
if not self.camera.is_opened():
raise Exception("Camera is not opened. Cannot start recording.")

self.recording_parameters.video_filename = filename
status = self.camera.enable_recording(self.recording_parameters)
if status != sl.ERROR_CODE.SUCCESS:
raise Exception(f"Failed to start recording: {status}")
return True

def stop_recording(self):
"""
Stop recording.
"""
self.camera.disable_recording()
return True

def retrieve(self, is_image=True, is_measure=False):
"""
Retrieve captured image frame or depth measure.
=NOTES=
- It is likely more efficient to obtain SIDE_BY_SIDE and then split the array
rather than calling retrieve_image twice for left and right separately.
- However, obtaining only IMAGE_LEFT is likely more efficient than getting SIDE_BY_SIDE.
TODO: Optimize by using GPU and reducing unnecessary CPU-GPU copies.
"""

self.grab_status = self.camera.grab(self.runtime_parameters)
if self.grab_status == sl.ERROR_CODE.SUCCESS:
if is_image:
self.camera.retrieve_image(self.mat_image, sl.VIEW.LEFT)
self.output_image = self.mat_image.get_data()
self.timestamp_nanoseconds = self.mat_image.timestamp.data_ns

if is_measure:
self.camera.retrieve_measure(self.mat_measure, sl.MEASURE.DEPTH_U16_MM)
self.output_measure = self.mat_measure.get_data()
else:
print("Failed to grab camera frame.")
return False
return True

def export_png(self, filename, is_image=True, is_measure=False):
"""
Export the last retrieved frame as a PNG image.
=INPUT=
filename - string
Path name + file name + .png extension
"""

if self.grab_status != sl.ERROR_CODE.SUCCESS:
return False

if is_image:
self.mat_image.write(filename)

if is_measure:
self.mat_measure.write(filename)

return True

def depth_at_xy(self, xpos: int = 0, ypos: int = 0):
"""
Get the depth value at a specific pixel coordinate.
=INPUT=
xpos - int : X-coordinate of the pixel
ypos - int : Y-coordinate of the pixel
=OUTPUT=
Depth value at (xpos, ypos)
"""
if self.output_measure is not None:
return self.output_measure[xpos, ypos]
return None
return None

0 comments on commit 7a1985f

Please sign in to comment.