Skip to content

Commit

Permalink
Testing model on ONNX format
Browse files Browse the repository at this point in the history
  • Loading branch information
mfclabber committed Sep 21, 2024
1 parent 8ce795e commit 37eb890
Show file tree
Hide file tree
Showing 8 changed files with 303 additions and 65 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
</p>

## TODO:
- [ ] Testing model on ONNX format
- [X] Testing model on ONNX format
- [ ] Writing inference mode on TensorRT
- [ ] Create full README.md
- [ ] Add tracking algorithm (m.b. [StrongSort](https://github.com/dyhBUPT/StrongSORT?tab=readme-ov-file))
Expand Down
Binary file modified output_track.mp4
Binary file not shown.
21 changes: 21 additions & 0 deletions scripts/convert2onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import onnx

import torch
import torch.nn as nn
import torch.nn.functional as F

from model import YOLOv9


if __name__ == "__main__":

torch_model = YOLOv9()

# torch_input = torch.randn(1, 1, 1280, 640)
# onnx_program = torch.onnx.dynamo_export(torch_model, torch_input)

# onnx_program.save("../weights/yolov9t.onnx")

torch_model.export2onnx()
# onnx_model = onnx.load("/home/mfclabber/fs_cones_detection&monodepth/weights/best.onnx")
# onnx.checker.check_model(onnx_model)
180 changes: 130 additions & 50 deletions scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
from ultralytics import YOLO
from roboflow import Roboflow

import onnx
import onnxruntime as ort

import torch
import torchvision
from torchvision import transforms, datasets
Expand Down Expand Up @@ -52,68 +55,145 @@
if __name__ == "__main__":

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YOLOv9().to(device)
encoder, depth_decoder, loaded_dict_enc = get_mono_640x192_model()

# image = np.array(Image.open(test_image_path_list[i])).transpose(2, 0, 1)[:3]
inference_mode = "on"

# disp_resized_np, pred_image = prediction(image,
# model,
# encoder,
# depth_decoder,
# loaded_dict_enc)


video_path = 'videos/track.mp4'
cap = cv2.VideoCapture(video_path)
if inference_mode != "onnx":

model = YOLOv9(path2weights="weights/best.pt").to(device)
encoder, depth_decoder, loaded_dict_enc = get_mono_640x192_model()

if (cap.isOpened()== False):
print("Error opening video stream or file")
# image = np.array(Image.open(test_image_path_list[i])).transpose(2, 0, 1)[:3]

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# disp_resized_np, pred_image = prediction(image,
# model,
# encoder,
# depth_decoder,
# loaded_dict_enc)


video_path = 'videos/track.mp4'
cap = cv2.VideoCapture(video_path)

print(f"Размер видео: {frame_width}x{frame_height}, FPS: {fps}, Количество кадров: {total_frames}")
if (cap.isOpened()== False):
print("Error opening video stream or file")

output_video_path = 'output_track.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

start_time = time.time()
num_frames = 0
frame_count = 0
print(f"Размер видео: {frame_width}x{frame_height}, FPS: {fps}, Количество кадров: {total_frames}")

while(cap.isOpened()):
ret, frame = cap.read()
output_video_path = 'output_track.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

frame_count += 1
num_frames += 1
current_fps = calculate_fps(start_time, num_frames)

if frame_count % 5 == 0 or frame_count == 1:
if ret == True:
disp_resized_np, annotated_frame = process_frame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
model,
encoder,
depth_decoder,
loaded_dict_enc)
cv2.putText(annotated_frame, f"FPS: {current_fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
start_time = time.time()
num_frames = 0
frame_count = 0

while(cap.isOpened()):
ret, frame = cap.read()

frame_count += 1
num_frames += 1
current_fps = calculate_fps(start_time, num_frames)

if cv2.waitKey(25) & 0xFF == ord('q'):
if frame_count % 5 == 0 or frame_count == 1:
if ret == True:
disp_resized_np, annotated_frame = process_frame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
model,
encoder,
depth_decoder,
loaded_dict_enc)
cv2.putText(annotated_frame, f"FPS: {current_fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

if cv2.waitKey(25) & 0xFF == ord('q'):
break
else:
break
else:
break
else:
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
else:
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

out.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

# time.sleep(1 / fps / 5)

cap.release()
out.release()

cv2.destroyAllWindows()

else:

# TODO # onnx_model

model = YOLOv9(path2weights="weights/best.onnx")
encoder, depth_decoder, loaded_dict_enc = get_mono_640x192_model()

# image = np.array(Image.open(test_image_path_list[i])).transpose(2, 0, 1)[:3]

# disp_resized_np, pred_image = prediction(image,
# model,
# encoder,
# depth_decoder,
# loaded_dict_enc)

out.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

# time.sleep(1 / fps / 5)
video_path = 'videos/track.mp4'
cap = cv2.VideoCapture(video_path)

if (cap.isOpened()== False):
print("Error opening video stream or file")

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(f"Размер видео: {frame_width}x{frame_height}, FPS: {fps}, Количество кадров: {total_frames}")

output_video_path = 'output_track.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

start_time = time.time()
num_frames = 0
frame_count = 0

while(cap.isOpened()):
ret, frame = cap.read()

frame_count += 1
num_frames += 1
current_fps = calculate_fps(start_time, num_frames)

if frame_count % 5 == 0 or frame_count == 1:
if ret == True:
disp_resized_np, annotated_frame = process_frame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
model,
encoder,
depth_decoder,
loaded_dict_enc)
cv2.putText(annotated_frame, f"FPS: {current_fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

if cv2.waitKey(25) & 0xFF == ord('q'):
break
else:
break
else:
cv2.imshow('Frame', cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

out.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))

# time.sleep(1 / fps / 5)

cap.release()
out.release()

cv2.destroyAllWindows()

cap.release()
out.release()

cv2.destroyAllWindows()
36 changes: 31 additions & 5 deletions scripts/model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
import torch
import numpy as np
from pathlib import Path
from ultralytics import YOLO
from typing import List, Dict

from utils import LABEL2ID, ID2LABEL


ID2LABEL = dict([
(0, "yellow_cone"),
(2, "blue_cone"),
(3, "large_orange_cone"),
(1, "orange_cone"),
(4, "unknown_cone")
])

LABEL2ID = dict()
for k, v in ID2LABEL.items():
LABEL2ID[v]=k


class YOLOv9(torch.nn.Module):
def __init__(self, num_classes: int=4) -> None:
def __init__(self, path2weights: Path, num_classes: int=4) -> None:
super().__init__()

self.model = YOLO("../weights/best.pt")
self.path2weights = path2weights
self.model = YOLO(f"{path2weights}")

self.LABEL2LABEL = dict([
("unknown_cone", "blue_cone"),
Expand All @@ -22,11 +35,17 @@ def __init__(self, num_classes: int=4) -> None:
])

def predict(self, X: torch.Tensor, confidence=40, overlap=30) -> torch.Tensor:
results = self.model.predict(X.transpose(1, 2, 0))

if self.path2weights[-2] != "pt":
results = self.model.predict(source=X.transpose(1, 2, 0), device=0)
else:
results = self.model.predict(source=X.transpose(1, 2, 0))

bboxes = results[0].boxes.data[:, :4]
labels_ = results[0].boxes.cls
scores = results[0].boxes.conf
labels = np.zeros_like(labels_.cpu())

for i, label in enumerate(labels_):
label = int(label.item())
labels[i] = LABEL2ID[self.LABEL2LABEL[ID2LABEL[label]]]
Expand All @@ -37,4 +56,11 @@ def predict(self, X: torch.Tensor, confidence=40, overlap=30) -> torch.Tensor:

# To calculate the loss function
def forward(self, images: List[torch.Tensor], annotation: List[Dict[str, torch.Tensor]]) -> Dict[str, int]:
return self.model(images, annotation)
return self.model(images, annotation)

def export2onnx(self, frame_size=640):
self.model.export(format="onnx",
imgsz = frame_size,
)

return self.model
18 changes: 9 additions & 9 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,11 @@ def get_cone_distances(detected_cones, depth_map):
res = get_cone_distances(bboxes, 1 / disp_resized_np)

return disp_resized_np, show_image_with_objects(image,
bboxes,
labels,
scores,
depths_value=res,
threshold_score=threshold_score)
bboxes,
labels,
scores,
depths_value=res,
threshold_score=threshold_score)


def process_frame(frame,
Expand All @@ -210,10 +210,10 @@ def process_frame(frame,
loaded_dict_enc):

disp_resized_np, annotated_frame = prediction(np.array(frame).transpose(2, 0, 1)[:3],
model,
encoder,
depth_decoder,
loaded_dict_enc)
model,
encoder,
depth_decoder,
loaded_dict_enc)

return disp_resized_np, np.array(annotated_frame)

Expand Down
Loading

0 comments on commit 37eb890

Please sign in to comment.