From e6d39e6893660a2d3dace24123b6070e112a69f0 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 11 Jul 2024 10:45:58 +0800
Subject: [PATCH 01/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detection/detection.launch.xml            |  29 +
 .../detector/camera_bev_detector.launch.xml   |  43 +
 .../launch/perception.launch.xml              |   2 +
 perception/tensorrt_bevdet/CMakeLists.txt     | 103 +++
 perception/tensorrt_bevdet/README.md          |  40 +
 .../tensorrt_bevdet/config/bevdet.param.yaml  |  11 +
 .../config/bevdet_r50_4dlongterm_depth.yaml   |  51 ++
 .../tensorrt_bevdet/config/cams_param.yaml    |  99 +++
 .../include/alignbev_plugin.hpp               |  99 +++
 perception/tensorrt_bevdet/include/bevdet.hpp | 239 ++++++
 .../tensorrt_bevdet/include/bevdet_node.hpp   | 140 ++++
 .../include/bevpool_plugin.hpp                | 103 +++
 perception/tensorrt_bevdet/include/common.hpp | 135 ++++
 .../include/cpu_jpegdecoder.hpp               |   9 +
 perception/tensorrt_bevdet/include/data.hpp   | 118 +++
 .../include/gatherbev_plugin.hpp              |  99 +++
 .../tensorrt_bevdet/include/iou3d_nms.hpp     |  60 ++
 .../tensorrt_bevdet/include/nvjpegdecoder.hpp | 101 +++
 .../tensorrt_bevdet/include/postprocess.hpp   |  68 ++
 .../tensorrt_bevdet/include/preprocess.hpp    |   9 +
 .../include/preprocess_plugin.hpp             | 103 +++
 .../launch/tensorrt_bevdet.launch.xml         |  40 +
 perception/tensorrt_bevdet/package.xml        |  40 +
 .../tensorrt_bevdet/src/alignbev_plugin.cu    | 334 ++++++++
 perception/tensorrt_bevdet/src/bevdet.cpp     | 757 ++++++++++++++++++
 .../tensorrt_bevdet/src/bevdet_node.cpp       | 259 ++++++
 .../tensorrt_bevdet/src/bevpool_plugin.cu     | 372 +++++++++
 .../tensorrt_bevdet/src/cpu_jpegdecoder.cpp   |  73 ++
 perception/tensorrt_bevdet/src/data.cpp       | 195 +++++
 .../tensorrt_bevdet/src/gatherbev_plugin.cu   | 296 +++++++
 perception/tensorrt_bevdet/src/iou3d_nms.cu   | 596 ++++++++++++++
 .../tensorrt_bevdet/src/nvjpegdecoder.cpp     | 268 +++++++
 perception/tensorrt_bevdet/src/postprocess.cu | 223 ++++++
 perception/tensorrt_bevdet/src/preprocess.cu  |  22 +
 .../tensorrt_bevdet/src/preprocess_plugin.cu  | 357 +++++++++
 35 files changed, 5493 insertions(+)
 create mode 100644 launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
 create mode 100644 perception/tensorrt_bevdet/CMakeLists.txt
 create mode 100644 perception/tensorrt_bevdet/README.md
 create mode 100644 perception/tensorrt_bevdet/config/bevdet.param.yaml
 create mode 100644 perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
 create mode 100644 perception/tensorrt_bevdet/config/cams_param.yaml
 create mode 100644 perception/tensorrt_bevdet/include/alignbev_plugin.hpp
 create mode 100644 perception/tensorrt_bevdet/include/bevdet.hpp
 create mode 100644 perception/tensorrt_bevdet/include/bevdet_node.hpp
 create mode 100644 perception/tensorrt_bevdet/include/bevpool_plugin.hpp
 create mode 100644 perception/tensorrt_bevdet/include/common.hpp
 create mode 100644 perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
 create mode 100644 perception/tensorrt_bevdet/include/data.hpp
 create mode 100644 perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
 create mode 100644 perception/tensorrt_bevdet/include/iou3d_nms.hpp
 create mode 100644 perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
 create mode 100644 perception/tensorrt_bevdet/include/postprocess.hpp
 create mode 100644 perception/tensorrt_bevdet/include/preprocess.hpp
 create mode 100644 perception/tensorrt_bevdet/include/preprocess_plugin.hpp
 create mode 100644 perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
 create mode 100644 perception/tensorrt_bevdet/package.xml
 create mode 100644 perception/tensorrt_bevdet/src/alignbev_plugin.cu
 create mode 100644 perception/tensorrt_bevdet/src/bevdet.cpp
 create mode 100644 perception/tensorrt_bevdet/src/bevdet_node.cpp
 create mode 100644 perception/tensorrt_bevdet/src/bevpool_plugin.cu
 create mode 100644 perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
 create mode 100644 perception/tensorrt_bevdet/src/data.cpp
 create mode 100644 perception/tensorrt_bevdet/src/gatherbev_plugin.cu
 create mode 100644 perception/tensorrt_bevdet/src/iou3d_nms.cu
 create mode 100644 perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
 create mode 100644 perception/tensorrt_bevdet/src/postprocess.cu
 create mode 100644 perception/tensorrt_bevdet/src/preprocess.cu
 create mode 100644 perception/tensorrt_bevdet/src/preprocess_plugin.cu

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
index 8f64b624540e7..abd79bd75019b 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
@@ -6,6 +6,7 @@
   <!-- Pipeline junctions -->
   <arg name="mode" description="options: `camera_lidar_radar_fusion`, `camera_lidar_fusion`, `lidar_radar_fusion`, `lidar` or `radar`"/>
   <arg name="lidar_detection_model" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
+  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
   <arg name="use_object_filter" description="use object filter"/>
   <arg name="objects_filter_method" description="options: `lanelet_filter` or `position_filter`"/>
   <arg name="use_pointcloud_map" description="use pointcloud map filter"/>
@@ -66,6 +67,7 @@
   <arg name="switch/detector/lidar_rule" default="false"/>
   <arg name="switch/detector/radar" default="false"/>
   <arg name="switch/detector/radar_only" default="false"/>
+  <arg name="switch/detector/camera_bev" default="false"/>
   <arg name="switch/detector/tracker_based" default="false"/>
   <let name="switch/detector/tracker_based" value="true" if="$(var use_detection_by_tracker)"/>
 
@@ -123,6 +125,9 @@
     <let name="switch/detector/radar_only" value="true" unless="$(var use_multi_channel_tracker_merger)"/>
     <let name="switch/detector/tracker_based" value="false"/>
   </group>
+  <group scoped="false" if="$(eval '&quot;$(var mode)&quot;==&quot;camera&quot;')">
+    <let name="switch/detector/camera_bev" value="true"/>
+  </group>
 
   <!-- Detector -->
   <group if="$(var switch/detector/camera_lidar)">
@@ -211,6 +216,30 @@
       <arg name="radar_object_clustering_param_path" value="$(var radar_object_clustering_param_path)"/>
     </include>
   </group>
+  <group if="$(var switch/detector/camera_bev)">
+    <!-- Camera-based bev detectors-->
+    <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml">
+      <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
+      <arg name="input/camera0/image" value="$(var input/camera0/image)"/>
+      <arg name="input/camera0/info" value="$(var input/camera0/info)"/>
+      <arg name="input/camera1/image" value="$(var input/camera1/image)"/>
+      <arg name="input/camera1/info" value="$(var input/camera1/info)"/>
+      <arg name="input/camera2/image" value="$(var input/camera2/image)"/>
+      <arg name="input/camera2/info" value="$(var input/camera2/info)"/>
+      <arg name="input/camera3/image" value="$(var input/camera3/image)"/>
+      <arg name="input/camera3/info" value="$(var input/camera3/info)"/>
+      <arg name="input/camera4/image" value="$(var input/camera4/image)"/>
+      <arg name="input/camera4/info" value="$(var input/camera4/info)"/>
+      <arg name="input/camera5/image" value="$(var input/camera5/image)"/>
+      <arg name="input/camera5/info" value="$(var input/camera5/info)"/>
+      <arg name="input/camera6/image" value="$(var input/camera6/image)"/>
+      <arg name="input/camera6/info" value="$(var input/camera6/info)"/>
+      <arg name="input/camera7/image" value="$(var input/camera7/image)"/>
+      <arg name="input/camera7/info" value="$(var input/camera7/info)"/>
+      <arg name="number_of_cameras" value="$(var number_of_cameras)"/>
+      <arg name="bev_detection_model" value="$(var bev_detection_model)"/>
+    </include>
+  </group>
   <!-- DetectionByTracker -->
   <group if="$(var switch/detector/tracker_based)">
     <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detector/tracker_based_detector.launch.xml">
diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
new file mode 100644
index 0000000000000..716e205e249aa
--- /dev/null
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<launch>
+  <!-- Lidar parameters -->
+  <arg name="input/pointcloud"/>
+
+  <!-- Camera parameters -->
+  <arg name="input/camera0/image"/>
+  <arg name="input/camera0/info"/>
+  <arg name="input/camera1/image"/>
+  <arg name="input/camera1/info"/>
+  <arg name="input/camera2/image"/>
+  <arg name="input/camera2/info"/>
+  <arg name="input/camera3/image"/>
+  <arg name="input/camera3/info"/>
+  <arg name="input/camera4/image"/>
+  <arg name="input/camera4/info"/>
+  <arg name="input/camera5/image"/>
+  <arg name="input/camera5/info"/>
+  <arg name="input/camera6/image"/>
+  <arg name="input/camera6/info"/>
+  <arg name="input/camera7/image"/>
+  <arg name="input/camera7/info"/>
+  <arg name="number_of_cameras"/>
+  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
+
+<!-- BEVDet -->
+  <group if="$(eval &quot;'$(var bev_detection_model)'=='bevdet'&quot;)">
+    <push-ros-namespace namespace="bevdet"/>
+    <group>
+      <include file="$(find-pkg-share tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
+        <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
+        <arg name="input/img_fl" value="$(var input/camera0/image)"/>
+        <arg name="input/img_f" value="$(var input/camera1/image)"/>
+        <arg name="input/img_fr" value="$(var input/camera2/image)"/>
+        <arg name="input/img_bl" value="$(var input/camera3/image)"/>
+        <arg name="input/img_b" value="$(var input/camera4/image)"/>
+        <arg name="input/img_br" value="$(var input/camera5/image)"/>
+        <arg name="output/pointcloud" value="points_painting"/>
+        <arg name="output/pointcboxesloud" value="boxes"/>
+      </include>
+    </group>
+  </group>
+</launch>
\ No newline at end of file
diff --git a/launch/tier4_perception_launch/launch/perception.launch.xml b/launch/tier4_perception_launch/launch/perception.launch.xml
index 6c428a5adea0b..52cb7d8291615 100644
--- a/launch/tier4_perception_launch/launch/perception.launch.xml
+++ b/launch/tier4_perception_launch/launch/perception.launch.xml
@@ -48,6 +48,7 @@
   <arg name="mode" default="camera_lidar_fusion" description="options: `camera_lidar_radar_fusion`, `camera_lidar_fusion`, `lidar_radar_fusion`, `lidar` or `radar`"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="lidar_detection_model" default="centerpoint" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
+  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
   <arg name="image_raw0" default="/sensing/camera/camera0/image_rect_color" description="image raw topic name"/>
   <arg name="camera_info0" default="/sensing/camera/camera0/camera_info" description="camera info topic name"/>
   <arg name="detection_rois0" default="/perception/object_recognition/detection/rois0" description="detection rois output topic name"/>
@@ -193,6 +194,7 @@
         <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detection.launch.xml">
           <arg name="mode" value="$(var mode)"/>
           <arg name="lidar_detection_model" value="$(var lidar_detection_model)"/>
+          <arg name="bev_detection_model" value="$(var bev_detection_model)"/>
           <arg name="use_multi_channel_tracker_merger" value="$(var use_multi_channel_tracker_merger)"/>
           <arg name="number_of_cameras" value="$(var image_number)"/>
           <arg name="input/camera0/image" value="$(var image_raw0)"/>
diff --git a/perception/tensorrt_bevdet/CMakeLists.txt b/perception/tensorrt_bevdet/CMakeLists.txt
new file mode 100644
index 0000000000000..d50811bce1898
--- /dev/null
+++ b/perception/tensorrt_bevdet/CMakeLists.txt
@@ -0,0 +1,103 @@
+cmake_minimum_required(VERSION 3.17)
+project(tensorrt_bevdet)
+
+add_compile_options(-W)
+add_compile_options(-std=c++17)
+set(CMAKE_CXX_FLAGS_RELEASE "-Wno-deprecated-declarations -O2")
+set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++17 -O0 -Xcompiler -fPIC -g -w -gencode=arch=compute_86,code=sm_86")
+find_package(tensorrt_common)
+if(NOT ${tensorrt_common_FOUND})
+  message(WARNING "The tensorrt_common package is not found. Please check its dependencies.")
+  return()
+endif()
+
+find_package(ament_cmake REQUIRED)
+find_package(cudnn_cmake_module REQUIRED)
+find_package(tensorrt_cmake_module REQUIRED)
+find_package(rclcpp REQUIRED)
+find_package(yaml-cpp REQUIRED)
+find_package(Eigen3 REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(PCL REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(pcl_conversions REQUIRED)
+find_package(cv_bridge REQUIRED)
+find_package(tf2_geometry_msgs REQUIRED)
+find_package(autoware_perception_msgs REQUIRED)
+
+find_package(CUDA REQUIRED)
+find_package(CUDAToolkit)
+find_package(CUDNN)
+find_package(TENSORRT)
+if(NOT (CUDAToolkit_FOUND AND CUDNN_FOUND AND TENSORRT_FOUND))
+  message(WARNING "cuda, cudnn, tensorrt libraries are not found")
+  return()
+endif()
+
+include_directories(
+  include
+  SYSTEM
+  ${YAML_CPP_INCLUDE_DIRS}
+  ${EIGEN3_INCLUDE_DIRS}
+  ${OpenCV_INCLUDE_DIRS}
+  ${PCL_INCLUDE_DIRS}
+  ${cv_bridge_INCLUDE_DIRS}
+  ${tf2_geometry_msgs_INCLUDE_DIRS}
+  ${autoware_perception_msgs_INCLUDE_DIRS}
+)
+
+cuda_add_executable(${PROJECT_NAME}_node 
+    src/bevdet_node.cpp 
+    src/bevdet.cpp
+    src/preprocess.cu 
+    src/iou3d_nms.cu
+    src/postprocess.cu
+    src/data.cpp
+    src/cpu_jpegdecoder.cpp
+    src/nvjpegdecoder.cpp
+
+    src/preprocess_plugin.cu
+    src/bevpool_plugin.cu
+    src/alignbev_plugin.cu
+    src/gatherbev_plugin.cu
+)
+
+# Link libraries to executable
+ament_target_dependencies(${PROJECT_NAME}_node
+  "rclcpp"
+  "sensor_msgs"
+  "pcl_conversions"
+  "cv_bridge"
+  "autoware_perception_msgs"
+  "tf2_geometry_msgs"
+)
+
+target_link_libraries(${PROJECT_NAME}_node 
+    yaml-cpp 
+    libnvinfer.so
+    libnvonnxparser.so
+    libz.so
+    libjpeg.so
+    rclcpp::rclcpp
+    stdc++fs
+    ${NVINFER}
+    ${TENSORRT_LIBRARIES}
+    ${CUDA_LIBRARIES}
+    ${CUBLAS_LIBRARIES}
+    ${CUDNN_LIBRARY}
+    ${OpenCV_LIBS}
+    ${PCL_LIBRARIES}
+)
+
+install(TARGETS ${PROJECT_NAME}_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(DIRECTORY 
+  launch
+  config
+  data
+  DESTINATION share/${PROJECT_NAME}/
+)
+
+ament_package()
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/README.md b/perception/tensorrt_bevdet/README.md
new file mode 100644
index 0000000000000..c537de8d83a5c
--- /dev/null
+++ b/perception/tensorrt_bevdet/README.md
@@ -0,0 +1,40 @@
+# tensorrt_bevdet
+
+## Purpose
+
+tensorrt_bevdet is a dynamic 3D bev object detection package based on 6 surround view cameras.
+
+## Inner-workings / Algorithms
+
+BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies multi-view images into the perspective of BEV for 3D object detection task. In this implementation, BEVDet network to inference with TensorRT.
+
+## Inputs / Outputs
+
+### Inputs
+
+| Name                 | Type                            | Description      |
+| -------------------- | ------------------------------- | ---------------- |
+| `~/input/pointcloud` | `sensor_msgs::msg::PointCloud2` | input pointcloud (only used for time alignment and display)|
+| `~/input/topic_img_fl` | `sensor_msgs::msg::Image` | input front_left camera image|
+| `~/input/topic_img_f` | `sensor_msgs::msg::Image` | input front camera image|
+| `~/input/topic_img_fr` | `sensor_msgs::msg::Image` | input front_right camera image|
+| `~/input/topic_img_bl` | `sensor_msgs::msg::Image` | input back_left camera image|
+| `~/input/topic_img_b` | `sensor_msgs::msg::Image` | input back camera image|
+| `~/input/topic_img_br` | `sensor_msgs::msg::Image` | input back_right camera image|
+
+### Outputs
+
+| Name                       | Type                                             | Description          |
+| -------------------------- | ------------------------------------------------ | -------------------- |
+| `~/output/boxes`         | `autoware_perception_msgs::msg::DetectedObjects` | detected objects     |
+| `~/output/pointcloud`     | `sensor_msgs::msg::PointCloud2`          | output pointcloud (only used for display)    |
+
+## Limittation
+
+The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset,  If you want to use this model to infer your data, you need to retrain it.
+
+You can traine models by refer below links.
+
+[BEVDet](https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1)
+
+[BEVDet export onnx](https://github.com/LCH1238/BEVDet/tree/export)
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/config/bevdet.param.yaml b/perception/tensorrt_bevdet/config/bevdet.param.yaml
new file mode 100644
index 0000000000000..1176abfda968b
--- /dev/null
+++ b/perception/tensorrt_bevdet/config/bevdet.param.yaml
@@ -0,0 +1,11 @@
+/**:
+    ros__parameters:
+      data_params:
+        N: 6  # camera num
+        H: 900 # image height
+        W: 1600 # image width
+        cams: ["CAM_FRONT_LEFT", "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_LEFT", "CAM_BACK", "CAM_BACK_RIGHT"]
+      post_process_params:
+        # post-process params
+        score_thre: 0.2
+        class_names: ["car", "truck", "construction_vehicle", "bus", "trailer", "barrier", "motorcycle", "bicycle", "pedestrian", "traffic_cone"]
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
new file mode 100644
index 0000000000000..074e1a1664afb
--- /dev/null
+++ b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
@@ -0,0 +1,51 @@
+bev_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+mean: [123.675, 116.28, 103.53]
+std: [58.395, 57.12, 57.375]
+use_depth: true 
+use_adj: true
+adj_num: 8
+transform_size: 6
+cam_params_size: 27
+sampling: nearest # nearest # or bicubic 
+data_config:
+    Ncams: 6
+    cams: [CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT, CAM_BACK_LEFT, CAM_BACK, CAM_BACK_RIGHT]
+    crop: [140, 0]
+    flip: true
+    input_size: [256, 704]
+    resize: [-0.06, 0.11]
+    resize_test: 0.0
+    rot: [-5.4, 5.4]
+    src_size: [900, 1600]
+grid_config:
+    depth: [1.0, 60.0, 0.5]
+    x: [-51.2, 51.2, 0.8]
+    y: [-51.2, 51.2, 0.8]
+    z: [-5, 3, 8]
+model:
+    bevpool_channels: 80
+    coder:
+        code_size: 9
+        max_num: 500
+        post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
+        score_threshold: 0.1
+    common_head:
+        channels: [2, 1, 3, 2, 2]
+        names: [reg, height, dim, rot, vel]
+    down_sample: 16
+    tasks:
+    -   class_names: [car, truck, construction_vehicle, bus, trailer, barrier, motorcycle,
+            bicycle, pedestrian, traffic_cone]
+        num_class: 10
+test_cfg:
+    max_per_img: 500
+    max_pool_nms: false
+    min_radius: [4, 12, 10, 1, 0.85, 0.175]
+    nms_rescale_factor:
+    -   [1.0, 0.7, 0.7, 0.4, 0.55, 1.1, 1.0, 1.0, 1.5, 3.5]
+    nms_thr: [0.2]
+    nms_type: [rotate]
+    post_center_limit_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
+    post_max_size: 500
+    pre_max_size: 1000
+    score_threshold: 0.1
diff --git a/perception/tensorrt_bevdet/config/cams_param.yaml b/perception/tensorrt_bevdet/config/cams_param.yaml
new file mode 100644
index 0000000000000..8a58de2a8b651
--- /dev/null
+++ b/perception/tensorrt_bevdet/config/cams_param.yaml
@@ -0,0 +1,99 @@
+cams:
+    CAM_BACK:
+        cam_intrinsic:
+        -   [809.2209905677063, 0.0, 829.2196003259838]
+        -   [0.0, 809.2209905677063, 481.77842384512485]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_BACK/n015-2018-08-02-17-16-37+0800__CAM_BACK__1533201470437525.jpg
+        sensor2ego_rotation: [0.5037872666382278, -0.49740249788611096, -0.4941850223835201,
+            0.5045496097725578]
+        sensor2ego_translation: [0.0283260309358, 0.00345136761476, 1.57910346144]
+        sensor2lidar_rotation:
+        -   [-0.9999413173368413, 0.009846645553456103, -0.004517239644395279]
+        -   [0.004591280151619832, 0.0075097729192299114, -0.9999612609782784]
+        -   [-0.009812340660088988, -0.9999233205011492, -0.007554540934044881]
+        sensor2lidar_translation: [-0.0036954598058400734, -0.9075747504218441, -0.283221870904093]
+    CAM_BACK_LEFT:
+        cam_intrinsic:
+        -   [1256.7414812095406, 0.0, 792.1125740759628]
+        -   [0.0, 1256.7414812095406, 492.7757465151356]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-08-02-17-16-37+0800__CAM_BACK_LEFT__1533201470447423.jpg
+        sensor2ego_rotation: [0.6924185592174665, -0.7031619420114925, -0.11648342771943819,
+            0.11203317912370753]
+        sensor2ego_translation: [1.03569100218, 0.484795032713, 1.59097014818]
+        sensor2lidar_rotation:
+        -   [-0.3170637040675469, 0.019895609309549686, -0.9481955348413992]
+        -   [0.9480779188624351, 0.03287371923632269, -0.3163345987226865]
+        -   [0.024877044206231273, -0.9992614689428244, -0.029285651056243037]
+        sensor2lidar_translation: [-0.48313021193087025, 0.09925074956760227, -0.24976867779076528]
+    CAM_BACK_RIGHT:
+        cam_intrinsic:
+        -   [1259.5137405846733, 0.0, 807.2529053838625]
+        -   [0.0, 1259.5137405846733, 501.19579884916527]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_BACK_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_BACK_RIGHT__1533201470427893.jpg
+        sensor2ego_rotation: [0.12280980120078765, -0.132400842670559, -0.7004305821388234,
+            0.690496031265798]
+        sensor2ego_translation: [1.0148780988, -0.480568219723, 1.56239545128]
+        sensor2lidar_rotation:
+        -   [-0.3567246414755177, -0.005413898071006906, 0.934193887729864]
+        -   [-0.9335307099588551, 0.04018099149456167, -0.3562385457614421]
+        -   [-0.035608197481429044, -0.9991777507681946, -0.01938758989490032]
+        sensor2lidar_translation: [0.482312676691663, 0.0791837770804591, -0.27307179836459383]
+    CAM_FRONT:
+        cam_intrinsic:
+        -   [1266.417203046554, 0.0, 816.2670197447984]
+        -   [0.0, 1266.417203046554, 491.50706579294757]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_FRONT/n015-2018-08-02-17-16-37+0800__CAM_FRONT__1533201470412460.jpg
+        sensor2ego_rotation: [0.4998015430569128, -0.5030316162024876, 0.4997798114386805,
+            -0.49737083824542755]
+        sensor2ego_translation: [1.70079118954, 0.0159456324149, 1.51095763913]
+        sensor2lidar_rotation:
+        -   [0.9999693728739966, 0.006755601474249302, -0.003951602548949129]
+        -   [0.003824557675651031, 0.0187164545279605, 0.9998175168942008]
+        -   [0.006828328680530752, -0.9998020084889915, 0.018690044109277416]
+        sensor2lidar_translation: [-0.012715806721303125, 0.7688055822719093, -0.31059455973338856]
+    CAM_FRONT_LEFT:
+        cam_intrinsic:
+        -   [1272.5979470598488, 0.0, 826.6154927353808]
+        -   [0.0, 1272.5979470598488, 479.75165386361925]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_FRONT_LEFT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_LEFT__1533201470404874.jpg
+        sensor2ego_rotation: [0.6757265034669446, -0.6736266522251881, 0.21214015046209478,
+            -0.21122827103904068]
+        sensor2ego_translation: [1.52387798135, 0.494631336551, 1.50932822144]
+        sensor2lidar_rotation:
+        -   [0.5726028043564848, 0.0027092489875872503, -0.8198284505998853]
+        -   [0.8195566437415105, 0.024067561593338945, 0.5724924979229841]
+        -   [0.021282296451183572, -0.9997066632011964, 0.011560770255041255]
+        sensor2lidar_translation: [-0.4917212028847189, 0.5936531098472528, -0.31925386662383204]
+    CAM_FRONT_RIGHT:
+        cam_intrinsic:
+        -   [1260.8474446004698, 0.0, 807.968244525554]
+        -   [0.0, 1260.8474446004698, 495.3344268742088]
+        -   [0.0, 0.0, 1.0]
+        data_path: 
+            ./data/nuscenes/samples/CAM_FRONT_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_RIGHT__1533201470420339.jpg
+        sensor2ego_rotation: [0.2060347966337182, -0.2026940577919598, 0.6824507824531167,
+            -0.6713610884174485]
+        sensor2ego_translation: [1.5508477543, -0.493404796419, 1.49574800619]
+        sensor2lidar_rotation:
+        -   [0.5518728006969844, -0.010452325915577761, 0.8338627949092215]
+        -   [-0.8335213795734647, 0.024319671348433384, 0.5519516857293306]
+        -   [-0.0260484480307739, -0.9996495898405882, 0.004709128022186309]
+        sensor2lidar_translation: [0.49650027090206095, 0.6174621492795893, -0.32655958795607276]
+ego2global_rotation: [0.9984303573176436, -0.008635865272570774, 0.0025833156025800875,
+    -0.05527720957189669]
+ego2global_translation: [249.89610931430778, 917.5522573162784, 0.0]
+lidar2ego_rotation: [0.7077955119163518, -0.006492242056004365, 0.010646214713995808,
+    -0.7063073142877817]
+lidar2ego_translation: [0.943713, 0.0, 1.84023]
+scene_token: e7ef871f77f44331aefdebc24ec034b7
+timestamp: 1533201470448696
diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
new file mode 100644
index 0000000000000..ccdd0e15546d9
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NvInfer.h>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <unistd.h>
+#include <iostream>
+
+namespace
+{
+static const char *ALIGN_PLUGIN_NAME {"AlignBEV"};
+static const char *ALIGN_PLUGIN_VERSION {"1"};
+} // namespace
+
+namespace nvinfer1
+{
+class AlignBEVPlugin : public IPluginV2DynamicExt
+{
+private:
+    const std::string name_;
+    std::string       namespace_;
+    struct
+    {
+    } m_;
+
+public:
+    AlignBEVPlugin() = delete;
+    AlignBEVPlugin(const std::string &name);
+    AlignBEVPlugin(const std::string &name, const void *buffer, size_t length);
+    ~AlignBEVPlugin();
+
+    // Method inherited from IPluginV2
+    const char *getPluginType() const noexcept override;
+    const char *getPluginVersion() const noexcept override;
+    int32_t     getNbOutputs() const noexcept override;
+    int32_t     initialize() noexcept override;
+    void        terminate() noexcept override;
+    size_t      getSerializationSize() const noexcept override;
+    void        serialize(void *buffer) const noexcept override;
+    void        destroy() noexcept override;
+    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *getPluginNamespace() const noexcept override;
+
+    // Method inherited from IPluginV2Ext
+    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
+    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
+    void     detachFromContext() noexcept override;
+
+    // Method inherited from IPluginV2DynamicExt
+    IPluginV2DynamicExt *clone() const noexcept override;
+    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
+    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
+    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
+    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
+    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+
+protected:
+    // To prevent compiler warnings
+    using nvinfer1::IPluginV2::enqueue;
+    using nvinfer1::IPluginV2::getOutputDimensions;
+    using nvinfer1::IPluginV2::getWorkspaceSize;
+    using nvinfer1::IPluginV2Ext::configurePlugin;
+};
+
+class AlignBEVPluginCreator : public IPluginCreator
+{
+private:
+    static PluginFieldCollection    fc_;
+    static std::vector<PluginField> attr_;
+    std::string                     namespace_;
+
+public:
+    AlignBEVPluginCreator();
+    ~AlignBEVPluginCreator();
+    const char *                 getPluginName() const noexcept override;
+    const char *                 getPluginVersion() const noexcept override;
+    const PluginFieldCollection *getFieldNames() noexcept override;
+    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
+    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
+    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *                 getPluginNamespace() const noexcept override;
+};
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
new file mode 100644
index 0000000000000..1ad04b1c4704f
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -0,0 +1,239 @@
+#ifndef __BEVDET_HPP__
+#define __BEVDET_HPP__
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+#include <yaml-cpp/yaml.h>
+
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+
+
+#include "common.hpp"
+#include "postprocess.hpp"
+#include "data.hpp"
+
+#include "NvInfer.h"
+#include <NvOnnxParser.h>
+
+
+class adjFrame{
+public:
+    adjFrame(){}
+    adjFrame(int _n,
+             size_t _buf_size) : 
+             n(_n), 
+             buf_size(_buf_size),
+             scenes_token(_n),
+             ego2global_rot(_n),
+             ego2global_trans(_n) {
+        CHECK_CUDA(cudaMalloc((void**)&adj_buffer, _n * _buf_size));
+        CHECK_CUDA(cudaMemset(adj_buffer, 0, _n * _buf_size));
+        last = 0;
+        buffer_num = 0;
+        init = false;
+
+        for(auto &rot : ego2global_rot){
+            rot = Eigen::Quaternion<float>(0.f, 0.f, 0.f, 0.f);
+        }
+        for(auto &trans : ego2global_trans){
+            trans = Eigen::Translation3f(0.f, 0.f, 0.f);
+        }
+
+    }
+    const std::string& lastScenesToken() const{
+        return scenes_token[last];
+    }
+
+    void reset(){
+        last = 0; // origin -1
+        buffer_num = 0;
+        init = false;
+    }
+
+    void saveFrameBuffer(const void* curr_buffer, 
+                        const std::string &curr_token, 
+                        const Eigen::Quaternion<float> &_ego2global_rot,
+                        const Eigen::Translation3f &_ego2global_trans){
+        int iters = init ? 1 : n;
+        while(iters--){
+          last = (last + 1) % n;
+          CHECK_CUDA(cudaMemcpy((char*)adj_buffer + last * buf_size, curr_buffer,
+                          buf_size, cudaMemcpyDeviceToDevice));
+          scenes_token[last] = curr_token;
+          ego2global_rot[last] = _ego2global_rot;
+          ego2global_trans[last] = _ego2global_trans;
+          buffer_num = std::min(buffer_num + 1, n);
+        }
+        init = true;
+    }
+    int havingBuffer(int idx){
+        return static_cast<int>(idx < buffer_num);
+    }
+
+    const void* getFrameBuffer(int idx){
+        idx = (-idx + last + n) % n;
+        return (char*)adj_buffer + idx * buf_size;
+    }
+    void getEgo2Global(int idx, 
+                    Eigen::Quaternion<float> &adj_ego2global_rot, 
+                    Eigen::Translation3f &adj_ego2global_trans){
+        idx = (-idx + last + n) % n;
+        adj_ego2global_rot = ego2global_rot[idx];
+        adj_ego2global_trans = ego2global_trans[idx];
+    }
+
+    ~adjFrame(){
+        CHECK_CUDA(cudaFree(adj_buffer));
+    }
+
+private:
+    int n;
+    size_t buf_size;
+
+    int last;
+    int buffer_num;
+    bool init;
+
+    std::vector<std::string> scenes_token;
+    std::vector<Eigen::Quaternion<float>> ego2global_rot;
+    std::vector<Eigen::Translation3f> ego2global_trans;
+
+    void* adj_buffer;
+};
+
+class BEVDet{
+public:
+    BEVDet(){}
+    BEVDet(const std::string &config_file, int n_img,      
+                                    std::vector<Eigen::Matrix3f> _cams_intrin, 
+                                    std::vector<Eigen::Quaternion<float>> _cams2ego_rot, 
+                                    std::vector<Eigen::Translation3f> _cams2ego_trans,
+                                    const std::string &onnx_file,
+                                    const std::string &engine_file);
+  
+    int DoInfer(const camsData &cam_data,  std::vector<Box> &out_detections, 
+                                                            float &cost_time, int idx=-1);
+    ~BEVDet();
+
+protected:
+    void InitParams(const std::string &config_file);
+
+    void InitViewTransformer(std::shared_ptr<int> &ranks_bev_ptr, 
+                             std::shared_ptr<int> &ranks_depth_ptr, 
+                             std::shared_ptr<int> &ranks_feat_ptr, 
+                             std::shared_ptr<int> &interval_starts_ptr, 
+                             std::shared_ptr<int> &interval_lengths_ptr);
+    void ExportEngine(const std::string &onnxFile, const std::string &trtFile);
+    int InitEngine(const std::string &engine_file);
+
+    int DeserializeTRTEngine(const std::string &engine_file, 
+                             nvinfer1::ICudaEngine **engine_ptr);
+
+    void MallocDeviceMemory();
+
+    void InitCamParams(const std::vector<Eigen::Quaternion<float>> &curr_cams2ego_rot,
+                       const std::vector<Eigen::Translation3f> &curr_cams2ego_trans,
+                       const std::vector<Eigen::Matrix3f> &cams_intrin);
+
+    void GetAdjBEVFeature(const std::string &curr_scene_token, 
+                        const Eigen::Quaternion<float> &ego2global_rot,
+                        const Eigen::Translation3f &ego2global_trans);
+
+    void GetCurr2AdjTransform(const Eigen::Quaternion<float> &curr_ego2global_rot,
+                            const Eigen::Quaternion<float> &adj_ego2global_rot,
+                            const Eigen::Translation3f &curr_ego2global_trans,
+                            const Eigen::Translation3f &adj_ego2global_trans,
+                            float* transform_dev);
+
+
+
+private:
+
+    int N_img;
+
+    int src_img_h;
+    int src_img_w;
+    int input_img_h;
+    int input_img_w;
+    int crop_h;
+    int crop_w;
+    float resize_radio;
+    int down_sample;
+    int feat_h;
+    int feat_w;
+    int bev_h;
+    int bev_w;
+    int bevpool_channel;
+
+    float depth_start;
+    float depth_end;
+    float depth_step;
+    int depth_num;
+
+    float x_start;
+    float x_end;
+    float x_step;
+    int xgrid_num;
+
+    float y_start;
+    float y_end;
+    float y_step;
+    int ygrid_num;
+
+    float z_start;
+    float z_end;
+    float z_step;
+    int zgrid_num;
+
+    std::vector<float> mean;
+    std::vector<float> std;
+
+    bool use_depth;
+    bool use_adj;
+    int adj_num;
+
+
+    int class_num;
+    float score_thresh;
+    float nms_overlap_thresh;
+    int nms_pre_maxnum;
+    int nms_post_maxnum;
+    std::vector<float> nms_rescale_factor;
+    std::vector<int> class_num_pre_task;
+    std::map<std::string, int> out_num_task_head;
+
+    std::vector<Eigen::Matrix3f> cams_intrin;
+    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+    std::vector<Eigen::Translation3f> cams2ego_trans;
+
+    Eigen::Matrix3f post_rot;
+    Eigen::Translation3f post_trans;
+
+    std::vector<size_t> trt_buffer_sizes;
+    void** trt_buffer_dev;
+    float* cam_params_host;
+    void** post_buffer;
+
+    std::map<std::string, int> buffer_map;
+
+    int valid_feat_num;
+    int unique_bev_num;
+
+    int transform_size;
+    int cam_params_size;
+
+    Logger g_logger;
+
+    nvinfer1::ICudaEngine* trt_engine;
+    nvinfer1::IExecutionContext* trt_context;
+
+    std::unique_ptr<PostprocessGPU> postprocess_ptr;
+    std::unique_ptr<adjFrame> adj_frame_ptr;
+
+};
+
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
new file mode 100644
index 0000000000000..832ac87d382b5
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -0,0 +1,140 @@
+#include <cstdio>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <thread>
+#include <chrono>
+#include <cassert>
+#include <map>
+
+#include <yaml-cpp/yaml.h>
+#include "bevdet.hpp"
+#include "cpu_jpegdecoder.hpp"
+
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+#include <pcl/common/transforms.h>
+
+
+
+#include <opencv2/opencv.hpp>
+#include <cv_bridge/cv_bridge.h>
+#include <sensor_msgs/msg/image.hpp>
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <pcl_conversions/pcl_conversions.h>  // msg2pcl
+
+#include <geometry_msgs/msg/point32.hpp>
+#include <geometry_msgs/msg/vector3.hpp>
+#include <geometry_msgs/msg/quaternion.hpp>
+#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
+
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+#include <autoware_perception_msgs/msg/shape.hpp>
+
+#include <message_filters/subscriber.h>
+#include <message_filters/synchronizer.h>
+#include <message_filters/sync_policies/approximate_time.h>
+
+
+using std::chrono::duration;
+using std::chrono::high_resolution_clock;
+
+typedef pcl::PointXYZI PointT;
+
+uint8_t getSemanticType(const std::string & class_name);
+void Getinfo(void);
+
+void box3DToDetectedObjects(const std::vector<Box>& boxes, 
+                    autoware_perception_msgs::msg::DetectedObjects & objects,
+                    const std::vector<std::string> & class_names,  
+                    float score_thre,
+                    const bool has_twist);
+
+
+// opencv Mat-> std::vector<char>
+int cvToArr(cv::Mat img, std::vector<char> &raw_data)
+{
+    if (img.empty())
+    {
+        std::cerr << "image is empty. " << std::endl;
+        return EXIT_FAILURE;
+    }
+    
+    std::vector<u_char> raw_data_;
+    cv::imencode(".jpg", img, raw_data_);
+    raw_data = std::vector<char>(raw_data_.begin(), raw_data_.end());
+    return EXIT_SUCCESS;
+}
+
+int cvImgToArr(std::vector<cv::Mat> &imgs, std::vector<std::vector<char>> &imgs_data)
+{
+    imgs_data.resize(imgs.size());
+
+    for(size_t i = 0; i < imgs_data.size(); i++)
+    {   
+        if(cvToArr(imgs[i], imgs_data[i]))
+        {
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
+}
+ 
+class TRTBEVDetNode : public rclcpp::Node
+{
+private:
+
+    size_t img_N_;
+    int img_w_; 
+    int img_h_;
+    
+    std::string model_config_;
+    
+    std::string onnx_file_;
+    std::string engine_file_;
+   
+    YAML::Node camconfig_; 
+    
+    std::vector<std::string> imgs_name_;
+    std::vector<std::string> class_names_;
+
+    camsData sampleData_;
+    std::shared_ptr<BEVDet> bevdet_;
+
+    uchar* imgs_dev_ = nullptr; 
+    float score_thre_;
+
+    rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr pub_cloud_;
+    rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr pub_boxes_;//ros2无该消息类型
+    
+    message_filters::Subscriber<sensor_msgs::msg::PointCloud2> sub_cloud_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_b_img_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_bl_img_; 
+    message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_; 
+
+    typedef message_filters::sync_policies::ApproximateTime<
+        sensor_msgs::msg::PointCloud2, 
+        sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
+        sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image> MySyncPolicy;
+    
+    typedef message_filters::Synchronizer<MySyncPolicy> Sync;
+    std::shared_ptr<Sync> sync_;
+
+public:
+    TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
+    ~TRTBEVDetNode();
+
+    void callback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud, 
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
+};
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
new file mode 100644
index 0000000000000..cab0b58d60c98
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NvInfer.h>
+#include <cmath>
+#include <map>
+#include <string>
+#include <unistd.h>
+#include <vector>
+#include <iostream>
+
+namespace
+{
+static const char *BEVPOOL_PLUGIN_NAME {"BEVPool"};
+static const char *BEVPOOL_PLUGIN_VERSION {"1"};
+} // namespace
+
+namespace nvinfer1
+{
+class BEVPoolPlugin : public IPluginV2DynamicExt
+{
+private:
+    const std::string name_;
+    std::string       namespace_;
+    struct
+    {
+        int bev_h;
+        int bev_w;
+        int n;
+    } m_;
+
+public:
+    BEVPoolPlugin() = delete;
+    BEVPoolPlugin(const std::string &name, int bev_h, int bev_w, int n);
+    BEVPoolPlugin(const std::string &name, const void *buffer, size_t length);
+    ~BEVPoolPlugin();
+
+    // Method inherited from IPluginV2
+    const char *getPluginType() const noexcept override;
+    const char *getPluginVersion() const noexcept override;
+    int32_t     getNbOutputs() const noexcept override;
+    int32_t     initialize() noexcept override;
+    void        terminate() noexcept override;
+    size_t      getSerializationSize() const noexcept override;
+    void        serialize(void *buffer) const noexcept override;
+    void        destroy() noexcept override;
+    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *getPluginNamespace() const noexcept override;
+
+    // Method inherited from IPluginV2Ext
+    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
+    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
+    void     detachFromContext() noexcept override;
+
+    // Method inherited from IPluginV2DynamicExt
+    IPluginV2DynamicExt *clone() const noexcept override;
+    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
+    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
+    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
+    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
+    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+
+protected:
+    // To prevent compiler warnings
+    using nvinfer1::IPluginV2::enqueue;
+    using nvinfer1::IPluginV2::getOutputDimensions;
+    using nvinfer1::IPluginV2::getWorkspaceSize;
+    using nvinfer1::IPluginV2Ext::configurePlugin;
+};
+
+class BEVPoolPluginCreator : public IPluginCreator
+{
+private:
+    static PluginFieldCollection    fc_;
+    static std::vector<PluginField> attr_;
+    std::string                     namespace_;
+
+public:
+    BEVPoolPluginCreator();
+    ~BEVPoolPluginCreator();
+    const char *                 getPluginName() const noexcept override;
+    const char *                 getPluginVersion() const noexcept override;
+    const PluginFieldCollection *getFieldNames() noexcept override;
+    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
+    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
+    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *                 getPluginNamespace() const noexcept override;
+};
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
new file mode 100644
index 0000000000000..e49a75804eb9a
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -0,0 +1,135 @@
+#ifndef __COMMON_HPP__
+#define __COMMON_HPP__
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string>
+#include <iostream>
+
+#include <cuda_runtime.h>
+#include <NvInfer.h>
+
+
+typedef unsigned char uchar;
+
+#define NUM_THREADS 512 
+
+#define CHECK_CUDA(ans) { GPUAssert((ans), __FILE__, __LINE__); }
+
+#define DIVUP(m, n) (((m) + (n)-1) / (n))
+
+inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+    if (abort) exit(code);
+  }
+};
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
+
+const int MAXTENSORDIMS = 6;
+struct TensorDesc {
+  int shape[MAXTENSORDIMS];
+  int stride[MAXTENSORDIMS];
+  int dim;
+};
+
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = DIVUP(N, NUM_THREADS);
+  int max_block_num = 4096;
+  return optimal_block_num < max_block_num ? optimal_block_num : max_block_num;
+}
+
+
+__inline__ std::string dataTypeToString(nvinfer1::DataType dataType)
+{
+    switch (dataType)
+    {
+    case nvinfer1::DataType::kFLOAT:
+        return std::string("FP32 ");
+    case nvinfer1::DataType::kHALF:
+        return std::string("FP16 ");
+    case nvinfer1::DataType::kINT8:
+        return std::string("INT8 ");
+    case nvinfer1::DataType::kINT32:
+        return std::string("INT32");
+    case nvinfer1::DataType::kBOOL:
+        return std::string("BOOL ");
+    default:
+        return std::string("Unknown");
+    }
+}
+
+
+__inline__ size_t dataTypeToSize(nvinfer1::DataType dataType)
+{
+    switch ((int)dataType)
+    {
+    case int(nvinfer1::DataType::kFLOAT):
+        return 4;
+    case int(nvinfer1::DataType::kHALF):
+        return 2;
+    case int(nvinfer1::DataType::kINT8):
+        return 1;
+    case int(nvinfer1::DataType::kINT32):
+        return 4;
+    case int(nvinfer1::DataType::kBOOL):
+        return 1;
+    default:
+        return 4;
+    }
+}
+
+__inline__ std::string shapeToString(nvinfer1::Dims32 dim)
+{
+    std::string output("(");
+    if (dim.nbDims == 0)
+    {
+        return output + std::string(")");
+    }
+    for (int i = 0; i < dim.nbDims - 1; ++i)
+    {
+        output += std::to_string(dim.d[i]) + std::string(", ");
+    }
+    output += std::to_string(dim.d[dim.nbDims - 1]) + std::string(")");
+    return output;
+}
+
+
+
+
+
+class Logger : public nvinfer1::ILogger {
+ public:
+  explicit Logger(Severity severity = Severity::kWARNING) : reportable_severity(severity){}
+
+  void log(Severity severity, const char *msg) noexcept override {
+    // suppress messages with severity enum value greater than the reportable
+    if (severity > reportable_severity) return;
+    switch (severity) {
+      case Severity::kINTERNAL_ERROR:
+        std::cerr << "INTERNAL_ERROR: ";
+        break;
+      case Severity::kERROR:
+        std::cerr << "ERROR: ";
+        break;
+      case Severity::kWARNING:
+        std::cerr << "WARNING: ";
+        break;
+      case Severity::kINFO:
+        std::cerr << "INFO: ";
+        break;
+      default:
+        std::cerr << "UNKNOWN: ";
+        break;
+    }
+    std::cerr << msg << std::endl;
+  }
+
+  Severity reportable_severity;
+};
+
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
new file mode 100644
index 0000000000000..e8f5bc966e15e
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
@@ -0,0 +1,9 @@
+#ifndef __CPU_JPEGDECODER__
+#define __CPU_JPEGDECODER__
+
+#include <vector>
+#include "common.hpp"
+
+int decode_cpu(const std::vector<std::vector<char>> &files_data, uchar* out_imgs, size_t width, size_t height);
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
new file mode 100644
index 0000000000000..d27716b2e7e72
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -0,0 +1,118 @@
+#ifndef __DATA_HPP__
+#define __DATA_HPP__
+
+#include <vector>
+
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+
+#include <yaml-cpp/yaml.h>
+
+#include "common.hpp"
+#include "nvjpegdecoder.hpp"
+
+struct camParams{
+    camParams() = default;
+    camParams(const YAML::Node &config, int n, std::vector<std::string> &cams_name);
+
+    int N_img;
+
+    Eigen::Quaternion<float> ego2global_rot;
+    Eigen::Translation3f ego2global_trans;
+
+    Eigen::Quaternion<float> lidar2ego_rot;
+    Eigen::Translation3f lidar2ego_trans;
+    // 
+    std::vector<Eigen::Matrix3f> cams_intrin;
+    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+    std::vector<Eigen::Translation3f> cams2ego_trans;
+    //
+    std::vector<std::string> imgs_file;
+
+    unsigned long long timestamp;
+    std::string scene_token;
+};
+
+struct camsData{
+    camsData() = default;
+    camsData(const camParams &_param) : param(_param), imgs_dev(nullptr){};
+    camParams param;
+    uchar* imgs_dev;
+};
+
+
+class DataLoader{
+public:
+    DataLoader() = default;
+    DataLoader(int _n_img, 
+               int _h, 
+               int _w,
+               const std::string &_data_infos_path,
+               const std::vector<std::string> &_cams_name,
+               bool _sep=true);
+
+    const std::vector<Eigen::Matrix3f>& get_cams_intrin() const{
+        return cams_intrin;
+    }
+    const std::vector<Eigen::Quaternion<float>>& get_cams2ego_rot() const{
+        return cams2ego_rot;
+    }
+    const std::vector<Eigen::Translation3f>& get_cams2ego_trans() const{
+        return cams2ego_trans;
+    }
+
+    const Eigen::Quaternion<float>& get_lidar2ego_rot() const{
+        return lidar2ego_rot;
+    }
+
+    const Eigen::Translation3f& get_lidar2ego_trans() const{
+        return lidar2ego_trans;
+    }
+
+    int size(){
+        return sample_num;
+    }
+
+    const camsData& data(int idx, bool time_order=true);
+    ~DataLoader();
+
+private:
+    std::vector<int> time_sequence;
+    std::string data_infos_path;
+    int sample_num;
+
+    std::vector<std::string> cams_name;
+    int n_img;
+    int img_h;
+    int img_w;
+
+    std::vector<camParams> cams_param; 
+    camsData cams_data;
+
+    std::vector<Eigen::Matrix3f> cams_intrin;
+    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+    std::vector<Eigen::Translation3f> cams2ego_trans;
+    Eigen::Quaternion<float> lidar2ego_rot;
+    Eigen::Translation3f lidar2ego_trans;
+
+#ifdef __HAVE_NVJPEG__
+    nvjpegDecoder nvdecoder;
+#endif
+    uchar *imgs_dev;
+    std::vector<std::vector<char>> imgs_data;
+    bool separate;
+
+};
+
+
+Eigen::Translation3f fromYamlTrans(YAML::Node x);
+Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
+Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
+
+
+int read_image(std::string &image_names, std::vector<char> &raw_data);
+
+int read_sample(std::vector<std::string> &imgs_file, 
+                std::vector<std::vector<char>> &imgs_data);
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
new file mode 100644
index 0000000000000..a3d22aacccac8
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NvInfer.h>
+#include <cmath>
+#include <map>
+#include <string>
+#include <unistd.h>
+#include <vector>
+#include <iostream>
+
+namespace
+{
+static const char *GATHERBEV_PLUGIN_NAME {"GatherBEV"};
+static const char *GATHERBEV_PLUGIN_VERSION {"1"};
+} // namespace
+
+namespace nvinfer1
+{
+class GatherBEVPlugin : public IPluginV2DynamicExt
+{
+private:
+    const std::string name_;
+    std::string       namespace_;
+    struct{
+    }m_;
+
+public:
+    GatherBEVPlugin() = delete;
+    GatherBEVPlugin(const std::string &name);
+    GatherBEVPlugin(const std::string &name, const void *buffer, size_t length);
+    ~GatherBEVPlugin();
+
+    // Method inherited from IPluginV2
+    const char *getPluginType() const noexcept override;
+    const char *getPluginVersion() const noexcept override;
+    int32_t     getNbOutputs() const noexcept override;
+    int32_t     initialize() noexcept override;
+    void        terminate() noexcept override;
+    size_t      getSerializationSize() const noexcept override;
+    void        serialize(void *buffer) const noexcept override;
+    void        destroy() noexcept override;
+    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *getPluginNamespace() const noexcept override;
+
+    // Method inherited from IPluginV2Ext
+    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
+    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
+    void     detachFromContext() noexcept override;
+
+    // Method inherited from IPluginV2DynamicExt
+    IPluginV2DynamicExt *clone() const noexcept override;
+    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
+    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
+    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
+    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
+    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+
+protected:
+    // To prevent compiler warnings
+    using nvinfer1::IPluginV2::enqueue;
+    using nvinfer1::IPluginV2::getOutputDimensions;
+    using nvinfer1::IPluginV2::getWorkspaceSize;
+    using nvinfer1::IPluginV2Ext::configurePlugin;
+};
+
+class GatherBEVPluginCreator : public IPluginCreator
+{
+private:
+    static PluginFieldCollection    fc_;
+    static std::vector<PluginField> attr_;
+    std::string                     namespace_;
+
+public:
+    GatherBEVPluginCreator();
+    ~GatherBEVPluginCreator();
+    const char *                 getPluginName() const noexcept override;
+    const char *                 getPluginVersion() const noexcept override;
+    const PluginFieldCollection *getFieldNames() noexcept override;
+    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
+    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
+    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *                 getPluginNamespace() const noexcept override;
+};
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
new file mode 100644
index 0000000000000..5907e966e0927
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -0,0 +1,60 @@
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2022.
+*/
+#pragma once
+
+#include <stdio.h>
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/gather.h>
+#include <thrust/host_vector.h>
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+
+#include <vector>
+#include <iostream>
+#include "common.hpp"
+
+
+const int THREADS_PER_BLOCK = 16;
+const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
+const float EPS = 1e-8;
+
+class Iou3dNmsCuda {
+ public:
+  Iou3dNmsCuda(const int head_x_size,
+               const int head_y_size,
+               const float nms_overlap_thresh);
+  ~Iou3dNmsCuda() = default;
+
+  int DoIou3dNms(const int box_num_pre,
+                 const float* res_box,
+                 const int* res_sorted_indices,
+                 long* host_keep_data);
+
+ private:
+  const int head_x_size_;
+  const int head_y_size_;
+  const float nms_overlap_thresh_;
+};
+
+
+static const int kBoxBlockSize = 9;  
+
+struct Box {
+  float x;
+  float y;
+  float z;
+  float l;
+  float w;
+  float h;
+  float r;
+  float vx = 0.0f;  // optional
+  float vy = 0.0f;  // optional
+  float score;
+  int label;
+  bool is_drop;  // for nms
+};
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
new file mode 100644
index 0000000000000..be0fcbd995074
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
@@ -0,0 +1,101 @@
+#ifndef  __NVJPEGDECODER__
+#define __NVJPEGDECODER__
+
+#ifdef __HAVE_NVJPEG__
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include <string.h>
+
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <nvjpeg.h>
+#include <thread>
+#include "common.hpp"
+
+
+#define DECODE_RGB 3
+#define DECODE_BGR 4
+#define DECODE_RGBI 5
+#define DECODE_BGRI 6
+#define DECODE_UNCHANGED 0
+
+
+#define CHECK_NVJPEG(call) { NvJpegAssert((call), __FILE__, __LINE__); }
+
+inline void NvJpegAssert(nvjpegStatus_t code, const char *file, int line){
+    if(code != NVJPEG_STATUS_SUCCESS){
+        std::cout << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line << std::endl; 
+        exit(1);       
+    }
+}
+
+
+struct decode_params_t{
+    decode_params_t(){}
+
+    cudaStream_t stream;
+
+    // used with decoupled API
+    nvjpegBufferPinned_t pinned_buffers[2]; // 2 buffers for pipelining
+    nvjpegBufferDevice_t device_buffer;
+    nvjpegJpegStream_t jpeg_streams[2]; //  2 streams for pipelining
+
+    nvjpegDecodeParams_t nvjpeg_decode_params;
+    nvjpegJpegDecoder_t nvjpeg_decoder;
+    nvjpegJpegState_t nvjpeg_decoupled_state;
+
+};
+
+struct share_params{
+    share_params(){
+        fmt = NVJPEG_OUTPUT_UNCHANGED;
+    }
+    share_params(size_t _fmt){
+        if(_fmt == DECODE_RGB || _fmt == DECODE_BGR || _fmt == DECODE_RGBI ||
+             _fmt == DECODE_BGRI || _fmt == DECODE_UNCHANGED){
+            fmt = (nvjpegOutputFormat_t)_fmt;
+        }
+        else{
+            std::cerr << "Unknown format! Auto switch BGR!" << std::endl;
+            fmt = NVJPEG_OUTPUT_BGR;
+        }
+    }
+
+    nvjpegOutputFormat_t fmt;
+    nvjpegJpegState_t nvjpeg_state;
+    nvjpegHandle_t nvjpeg_handle;
+
+    bool hw_decode_available;
+};
+
+
+class nvjpegDecoder{
+public:
+    nvjpegDecoder(){}
+    nvjpegDecoder(size_t n, size_t _fmt);
+
+    int decode(const std::vector<std::vector<char>> &imgs_data, uchar* out_imgs);
+    int init();
+    ~nvjpegDecoder();
+private:
+    size_t N_img;
+    std::vector<nvjpegImage_t> iout;
+    std::vector<nvjpegImage_t> isz;
+
+    std::vector<int> widths;
+    std::vector<int> heights;
+
+    share_params share_param;
+    std::vector<decode_params_t> params;
+
+};
+
+#endif
+
+#endif
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
new file mode 100644
index 0000000000000..f89aef2057c0a
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -0,0 +1,68 @@
+#ifndef __POSTPROCESS_HPP__
+#define __POSTPROCESS_HPP__
+
+#include <vector>
+
+#include "iou3d_nms.hpp"
+#include "common.hpp"
+
+
+class PostprocessGPU{
+public:
+    PostprocessGPU(){};
+    PostprocessGPU(const int _class_num, 
+                    const float _score_thresh,
+                    const float _nms_thresh, 
+                    const int _nms_pre_maxnum,
+                    const int _nms_post_maxnum, 
+                    const int _down_sample, 
+                    const int _output_h, 
+                    const int _output_w, 
+                    const float _x_step, 
+                    const float _y_step,
+                    const float _x_start, 
+                    const float _y_start,
+                    const std::vector<int>& _class_num_pre_task,
+                    const std::vector<float>& _nms_rescale_factor);
+
+    void DoPostprocess(void ** const bev_buffer, std::vector<Box>& out_detections);
+    ~PostprocessGPU();
+
+
+private:
+    int class_num;
+    float score_thresh;
+    float nms_thresh;
+    int nms_pre_maxnum;
+    int nms_post_maxnum;
+    int down_sample;
+    int output_h;
+    int output_w;
+    float x_step;
+    float y_step;
+    float x_start;
+    float y_start;
+    int map_size;
+    int task_num;
+
+    std::vector<int> class_num_pre_task;
+    std::vector<float> nms_rescale_factor;
+
+    std::unique_ptr<Iou3dNmsCuda> iou3d_nms;
+
+    float* boxes_dev = nullptr;
+    float* score_dev = nullptr;
+    int* cls_dev = nullptr;
+    int* valid_box_num = nullptr;
+    int* sorted_indices_dev = nullptr;
+    long* keep_data_host = nullptr;
+    int* sorted_indices_host = nullptr;
+    float* boxes_host = nullptr;
+    float* score_host = nullptr;
+    int* cls_host = nullptr;
+
+    float* nms_rescale_factor_dev = nullptr;
+
+};
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/preprocess.hpp b/perception/tensorrt_bevdet/include/preprocess.hpp
new file mode 100644
index 0000000000000..68870bf0b5438
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/preprocess.hpp
@@ -0,0 +1,9 @@
+#ifndef __PREPROCESS_HPP__
+#define __PREPROCESS_HPP__
+
+#include "common.hpp"
+
+void convert_RGBHWC_to_BGRCHW(uchar *input, uchar *output, 
+                                                        int channels, int height, int width);
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
new file mode 100644
index 0000000000000..5aa37c26f98e5
--- /dev/null
+++ b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NvInfer.h>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <unistd.h>
+#include <iostream>
+
+
+namespace
+{
+static const char *PRE_PLUGIN_NAME {"Preprocess"};
+static const char *PRE_PLUGIN_VERSION {"1"};
+} // namespace
+
+namespace nvinfer1
+{
+class PreprocessPlugin : public IPluginV2DynamicExt
+{
+private:
+    const std::string name_;
+    std::string       namespace_;
+    struct
+    {
+        int crop_h;
+        int crop_w;
+        float resize_radio;
+    } m_;
+
+public:
+    PreprocessPlugin() = delete;
+    PreprocessPlugin(const std::string &name, int crop_h, int crop_w, float resize_radio);
+    PreprocessPlugin(const std::string &name, const void *buffer, size_t length);
+    ~PreprocessPlugin();
+
+    // Method inherited from IPluginV2
+    const char *getPluginType() const noexcept override;
+    const char *getPluginVersion() const noexcept override;
+    int32_t     getNbOutputs() const noexcept override;
+    int32_t     initialize() noexcept override;
+    void        terminate() noexcept override;
+    size_t      getSerializationSize() const noexcept override;
+    void        serialize(void *buffer) const noexcept override;
+    void        destroy() noexcept override;
+    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *getPluginNamespace() const noexcept override;
+
+    // Method inherited from IPluginV2Ext
+    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
+    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
+    void     detachFromContext() noexcept override;
+
+    // Method inherited from IPluginV2DynamicExt
+    IPluginV2DynamicExt *clone() const noexcept override;
+    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
+    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
+    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
+    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
+    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+
+protected:
+    // To prevent compiler warnings
+    using nvinfer1::IPluginV2::enqueue;
+    using nvinfer1::IPluginV2::getOutputDimensions;
+    using nvinfer1::IPluginV2::getWorkspaceSize;
+    using nvinfer1::IPluginV2Ext::configurePlugin;
+};
+
+class PreprocessPluginCreator : public IPluginCreator
+{
+private:
+    static PluginFieldCollection    fc_;
+    static std::vector<PluginField> attr_;
+    std::string                     namespace_;
+
+public:
+    PreprocessPluginCreator();
+    ~PreprocessPluginCreator();
+    const char *                 getPluginName() const noexcept override;
+    const char *                 getPluginVersion() const noexcept override;
+    const PluginFieldCollection *getFieldNames() noexcept override;
+    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
+    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
+    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
+    const char *                 getPluginNamespace() const noexcept override;
+};
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
new file mode 100644
index 0000000000000..494f812e5d43e
--- /dev/null
+++ b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<launch>
+    <!-- input topic name -->
+    <arg name="input/pointcloud" default="/lidar_top"/>
+    <arg name="input/img_fl" default="/cam_front_left/raw"/>
+    <arg name="input/img_f" default="/cam_front/raw"/>
+    <arg name="input/img_fr" default="/cam_front_right/raw"/>
+    <arg name="input/img_bl" default="/cam_back_left/raw"/>
+    <arg name="input/img_b" default="/cam_back/raw"/>
+    <arg name="input/img_br" default="/cam_back_right/raw"/>
+    <arg name="output/pointcloud" default="points_painting"/>
+    <arg name="output/boxes" default="boxes"/>
+
+    <arg name="onnx_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.onnx"/>
+    <arg name="engine_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.engine"/>
+    <arg name="cam_config" default="$(find-pkg-share trt_bevdet)/config/cams_param.yaml"/>
+    <arg name="model_config" default="$(find-pkg-share trt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
+    <arg name="param_file" default="$(find-pkg-share trt_bevdet)/config/bevdet.param.yaml"/>
+
+    <!-- bevdet node -->
+    <node pkg="tensorrt_bevdet" exec="tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
+    <!-- ros topic name -->
+    <remap from="~/input/topic_cloud"  to="$(var input/pointcloud)" />
+    <remap from="~/input/topic_img_fl" to="$(var input/img_fl)" />
+    <remap from="~/input/topic_img_f"  to="$(var input/img_f)" />
+    <remap from="~/input/topic_img_fr" to="$(var input/img_fr)" />
+    <remap from="~/input/topic_img_bl" to="$(var input/img_bl)" />
+    <remap from="~/input/topic_img_b"  to="$(var input/img_b)" />
+    <remap from="~/input/topic_img_br" to="$(var input/img_br)" />
+
+    <remap from="~/output/painting_cloud" to="$(var output/pointcloud)" />
+    <remap from="~/output/boxes" to="$(var output/boxes)" />
+
+    <param name="onnx_file" value="$(var onnx_file)" />
+    <param name="engine_file" value="$(var engine_file)" />
+    <param name="cam_config" value="$(var cam_config)" />
+    <param name="model_config" value="$(var model_config)" />
+    <param from="$(var param_file)"/>
+    </node>
+</launch>
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/package.xml b/perception/tensorrt_bevdet/package.xml
new file mode 100644
index 0000000000000..069a41ae31318
--- /dev/null
+++ b/perception/tensorrt_bevdet/package.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>tensorrt_bevdet</name>
+  <version>0.0.1</version>
+  <description>tensorrt library implementation for bevdet</description>
+
+  <author email="cynthia.liu@autocore.ai">Cynthia</author>
+  <maintainer email="cynthia.liu@autocore.ai">Cynthia</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>autoware_cmake</buildtool_depend>
+  <buildtool_depend>cudnn_cmake_module</buildtool_depend>
+  <buildtool_depend>tensorrt_cmake_module</buildtool_depend>
+
+  <buildtool_export_depend>cudnn_cmake_module</buildtool_export_depend>
+  <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
+
+  <depend>autoware_perception_msgs</depend>
+  <depend>yaml-cpp</depend>
+  <depend>cv_bridge</depend>
+  <depend>libopencv-dev</depend>
+  <depend>pcl_conversions</depend>
+  <depend>pcl_ros</depend>
+  <depend>rclcpp</depend>
+  <depend>rclcpp_components</depend>
+  <depend>sensor_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>tf2_geometry_msgs</depend>
+  <depend>tensorrt_common</depend>
+
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/src/alignbev_plugin.cu b/perception/tensorrt_bevdet/src/alignbev_plugin.cu
new file mode 100644
index 0000000000000..a7a56c6b9806d
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/alignbev_plugin.cu
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "alignbev_plugin.hpp"
+#include "common.hpp"
+
+#include <cuda_fp16.h>
+#include <cuda_runtime_api.h>
+
+
+static inline __device__ bool within_bounds_2d(int h, int w, int H, int W){
+    return h >= 0 && h < H && w >= 0 && w < W;
+}
+
+template <typename T1, typename T2>
+__global__ void align_bev_kernel(const int nthreads, const T1 *input,
+                                       const float *trans, T2 *output,
+                                       TensorDesc output_desc){
+    int C = output_desc.shape[1];         // 80
+    int out_H = output_desc.shape[2];     // 128
+    int out_W = output_desc.shape[3];     // 128
+    int out_sN = output_desc.stride[0];   // 80 * 128 * 128
+    int out_sC = output_desc.stride[1];   // 128 * 128
+    int out_sH = output_desc.stride[2];   // 128
+    int out_sW = output_desc.stride[3];   // 1
+
+    CUDA_1D_KERNEL_LOOP(index, nthreads){
+        const int w = index % out_W;               //  j
+        const int h = (index / out_W) % out_H;     //  i
+        const int n = index / (out_H * out_W);     // batch
+
+        float ix = trans[n * 6 + 0 * 3 + 0] * w + 
+                   trans[n * 6 + 0 * 3 + 1] * h + 
+                   trans[n * 6 + 0 * 3 + 2];
+        float iy = trans[n * 6 + 1 * 3 + 0] * w + 
+                   trans[n * 6 + 1 * 3 + 1] * h + 
+                   trans[n * 6 + 1 * 3 + 2];
+
+        // NE, NW, SE, SW point
+        int ix_nw = static_cast<int>(::floor(ix));
+        int iy_nw = static_cast<int>(::floor(iy));
+        int ix_ne = ix_nw + 1;
+        int iy_ne = iy_nw;
+        int ix_sw = ix_nw;
+        int iy_sw = iy_nw + 1;
+        int ix_se = ix_nw + 1;
+        int iy_se = iy_nw + 1;
+
+        T2 nw = (ix_se - ix) * (iy_se - iy);
+        T2 ne = (ix - ix_sw) * (iy_sw - iy);
+        T2 sw = (ix_ne - ix) * (iy - iy_ne);
+        T2 se = (ix - ix_nw) * (iy - iy_nw);
+
+        // bilinear
+        auto inp_ptr_NC = input + n * out_sN;
+        auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
+        for (int c = 0; c < C; ++c, inp_ptr_NC += out_sC, out_ptr_NCHW += out_sC){
+            *out_ptr_NCHW = static_cast<T2>(0);
+            if (within_bounds_2d(iy_nw, ix_nw, out_H, out_W)){
+                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_nw * out_sH + ix_nw * out_sW]) * nw;
+            }
+            if (within_bounds_2d(iy_ne, ix_ne, out_H, out_W)){
+                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_ne * out_sH + ix_ne * out_sW]) * ne;
+            }
+            if (within_bounds_2d(iy_sw, ix_sw, out_H, out_W)){
+                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_sw * out_sH + ix_sw * out_sW]) * sw;
+            }
+            if (within_bounds_2d(iy_se, ix_se, out_H, out_W)){
+                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_se * out_sH + ix_se * out_sW]) * se;
+            }
+        }
+    }
+}
+
+void create_desc(const int *dims, int nb_dims, TensorDesc &desc){
+    memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims);
+    desc.stride[nb_dims - 1] = 1;
+    for (int i = nb_dims - 2; i >= 0; --i){
+        desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1];
+    }
+}
+
+
+namespace nvinfer1 {
+// class AlignBEVPlugin
+AlignBEVPlugin::AlignBEVPlugin(const std::string &name):
+    name_(name){
+}
+
+AlignBEVPlugin::AlignBEVPlugin(const std::string &name, const void *buffer, size_t length):
+    name_(name){
+    memcpy(&m_, buffer, sizeof(m_));
+}
+
+AlignBEVPlugin::~AlignBEVPlugin(){
+}
+
+IPluginV2DynamicExt *AlignBEVPlugin::clone() const noexcept {
+    auto p = new AlignBEVPlugin(name_, &m_, sizeof(m_));
+    p->setPluginNamespace(namespace_.c_str());
+    return p;
+}
+
+int32_t AlignBEVPlugin::getNbOutputs() const noexcept {
+    return 1;
+}
+ 
+DataType AlignBEVPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
+                                                                int32_t nbInputs) const noexcept {
+    return DataType::kFLOAT; // FIXME 
+}
+
+DimsExprs AlignBEVPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
+                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
+    
+    return inputs[0]; 
+}
+
+bool AlignBEVPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
+                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
+    // adj_feat  
+    if(pos == 0){
+        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+                inOut[pos].format == TensorFormat::kLINEAR;
+    }    
+    else if(pos == 2){ // out
+        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+                inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 1){ // transform
+        return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    return false;
+}
+
+size_t AlignBEVPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
+                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
+    return 0;
+}
+
+int32_t AlignBEVPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
+    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+    
+    // TODO
+    // inputs[0] == adj_feat  b x 8 x 80 x 128 x 128
+    // inputs[1] == transform b x 8 x 6
+
+    int bev_channel = inputDesc[0].dims.d[2];
+    int bev_h = inputDesc[0].dims.d[3];
+    int bev_w = inputDesc[0].dims.d[4];
+    int adj_num = inputDesc[0].dims.d[1];
+
+    int output_dim[4] = {8, bev_channel, bev_h, bev_w};
+
+    TensorDesc output_desc;
+    create_desc(output_dim, 4, output_desc);
+
+    int count = 1;
+    for (int i = 0; i < 4; ++i){
+        if (i == 1){
+            continue;
+        }
+        count *= output_desc.shape[i];
+    }
+
+    switch (int(outputDesc[0].type))
+    {
+    case int(DataType::kFLOAT):
+        if(inputDesc[0].type == DataType::kFLOAT){
+            // printf("align : fp32, fp32\n");
+            align_bev_kernel<float, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+                                                    count, 
+                                                    reinterpret_cast<const float *>(inputs[0]), 
+                                                    reinterpret_cast<const float*>(inputs[1]), 
+                                                    reinterpret_cast<float *>(outputs[0]), 
+                                                    output_desc);
+        }
+        else{
+            // printf("align : fp16, fp32\n");
+            align_bev_kernel<__half, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+                                                    count, 
+                                                    reinterpret_cast<const __half *>(inputs[0]), 
+                                                    reinterpret_cast<const float*>(inputs[1]), 
+                                                    reinterpret_cast<float *>(outputs[0]), 
+                                                    output_desc);
+        }
+        break;
+    case int(DataType::kHALF):
+        if(inputDesc[0].type == DataType::kFLOAT){
+            // printf("align : fp32, fp16\n");
+            align_bev_kernel<float, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+                                                    count, 
+                                                    reinterpret_cast<const float *>(inputs[0]), 
+                                                    reinterpret_cast<const float*>(inputs[1]), 
+                                                    reinterpret_cast<__half *>(outputs[0]), 
+                                                    output_desc);
+        }
+        else{
+            // printf("align : fp16, fp16\n");
+            align_bev_kernel<__half, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+                                                    count, 
+                                                    reinterpret_cast<const __half *>(inputs[0]), 
+                                                    reinterpret_cast<const float*>(inputs[1]), 
+                                                    reinterpret_cast<__half *>(outputs[0]), 
+                                                    output_desc);
+        }
+        break;
+    default: // should NOT be here
+        printf("\tUnsupport datatype!\n");
+    }
+
+
+    return 0;
+}
+
+void AlignBEVPlugin::destroy() noexcept {
+    delete this;
+    return;
+}
+
+void AlignBEVPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
+                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
+    return;
+}
+
+int32_t AlignBEVPlugin::initialize() noexcept {
+    return 0;
+}
+
+void AlignBEVPlugin::terminate() noexcept {
+    return;
+}
+
+size_t AlignBEVPlugin::getSerializationSize() const noexcept {
+    return sizeof(m_);
+}
+
+void AlignBEVPlugin::serialize(void *buffer) const noexcept {
+    memcpy(buffer, &m_, sizeof(m_));
+    return;
+}
+
+void AlignBEVPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *AlignBEVPlugin::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *AlignBEVPlugin::getPluginType() const noexcept {
+    return ALIGN_PLUGIN_NAME;
+}
+
+const char *AlignBEVPlugin::getPluginVersion() const noexcept {
+    return ALIGN_PLUGIN_VERSION;
+}
+
+void AlignBEVPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
+                                                        IGpuAllocator *gpuAllocator) noexcept {
+    return;
+}
+
+void AlignBEVPlugin::detachFromContext() noexcept {
+    return;
+}
+
+// class AlignBEVPluginCreator
+PluginFieldCollection    AlignBEVPluginCreator::fc_ {};
+std::vector<PluginField> AlignBEVPluginCreator::attr_;
+
+AlignBEVPluginCreator::AlignBEVPluginCreator() {
+    attr_.clear();
+    fc_.nbFields = attr_.size();
+    fc_.fields   = attr_.data();
+}
+
+AlignBEVPluginCreator::~AlignBEVPluginCreator() {
+}
+
+
+IPluginV2DynamicExt *AlignBEVPluginCreator::createPlugin(const char *name, 
+                                    const PluginFieldCollection *fc) noexcept {
+    // const PluginField *fields = fc->fields;
+    AlignBEVPlugin *pObj = new AlignBEVPlugin(name);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+IPluginV2DynamicExt *AlignBEVPluginCreator::deserializePlugin(const char *name, 
+                                        const void *serialData, size_t serialLength) noexcept {
+    AlignBEVPlugin *pObj = new AlignBEVPlugin(name, serialData, serialLength);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+void AlignBEVPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *AlignBEVPluginCreator::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *AlignBEVPluginCreator::getPluginName() const noexcept {
+    return ALIGN_PLUGIN_NAME;
+}
+
+const char *AlignBEVPluginCreator::getPluginVersion() const noexcept {
+    return ALIGN_PLUGIN_VERSION;
+}
+
+const PluginFieldCollection *AlignBEVPluginCreator::getFieldNames() noexcept {
+    return &fc_;
+}
+
+REGISTER_TENSORRT_PLUGIN(AlignBEVPluginCreator);
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
new file mode 100644
index 0000000000000..8d50bbcbf92c4
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -0,0 +1,757 @@
+#include <iostream>
+#include <cstdio>
+#include <fstream>
+#include <chrono>
+
+#include <thrust/sort.h>
+#include <thrust/functional.h>
+
+#include "bevdet.hpp"
+
+#include "alignbev_plugin.hpp"
+#include "bevpool_plugin.hpp"
+#include "preprocess_plugin.hpp"
+#include "gatherbev_plugin.hpp"
+
+using std::chrono::duration;
+using std::chrono::high_resolution_clock;
+
+
+BEVDet::BEVDet(const std::string &config_file, int n_img,               
+               std::vector<Eigen::Matrix3f> _cams_intrin, 
+               std::vector<Eigen::Quaternion<float>> _cams2ego_rot, 
+               std::vector<Eigen::Translation3f> _cams2ego_trans,
+               const std::string &onnx_file,
+               const std::string &engine_file) : 
+               cams_intrin(_cams_intrin), 
+               cams2ego_rot(_cams2ego_rot), 
+               cams2ego_trans(_cams2ego_trans){
+
+    InitParams(config_file);
+    if(n_img != N_img){
+        printf("BEVDet need %d images, but given %d images!", N_img, n_img);
+    }
+    auto start = high_resolution_clock::now();
+
+    std::shared_ptr<int> ranks_bev_ptr = nullptr;
+    std::shared_ptr<int> ranks_depth_ptr = nullptr;
+    std::shared_ptr<int> ranks_feat_ptr = nullptr;
+    std::shared_ptr<int> interval_starts_ptr = nullptr;
+    std::shared_ptr<int> interval_lengths_ptr = nullptr;
+
+
+    InitViewTransformer(ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, 
+                                interval_starts_ptr, interval_lengths_ptr);
+    auto end = high_resolution_clock::now();
+    duration<float> t = end - start;
+    printf("InitVewTransformer cost time : %.4lf ms\n", t.count() * 1000);
+
+    if (access(engine_file.c_str(), F_OK) == 0)
+    {
+        printf("Inference engine prepared.");
+    } 
+    else
+    {
+        //onnx to engine
+        printf("Could not find %s, try making TensorRT engine from onnx", engine_file.c_str());
+        ExportEngine(onnx_file, engine_file);
+    }
+    InitEngine(engine_file); // FIXME
+    MallocDeviceMemory();
+
+    if(use_adj){
+        adj_frame_ptr.reset(new adjFrame(adj_num, 
+                                        trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
+    }
+
+    cam_params_host = new float[N_img * cam_params_size];
+    
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_bev"]], ranks_bev_ptr.get(), 
+                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_depth"]], ranks_depth_ptr.get(), 
+                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_feat"]], ranks_feat_ptr.get(), 
+                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["interval_starts"]], interval_starts_ptr.get(), 
+                                        unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["interval_lengths"]], interval_lengths_ptr.get(), 
+                                        unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["mean"]], mean.data(), 3 * sizeof(float),
+                                                                    cudaMemcpyHostToDevice));
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["std"]], std.data(), 3 * sizeof(float),
+                                                                    cudaMemcpyHostToDevice));                                                     
+}
+
+
+
+void BEVDet::InitCamParams(const std::vector<Eigen::Quaternion<float>> &curr_cams2ego_rot,
+                           const std::vector<Eigen::Translation3f> &curr_cams2ego_trans,
+                           const std::vector<Eigen::Matrix3f> &curr_cams_intrin){
+    for(int i = 0; i < N_img; i++){
+        cam_params_host[i * cam_params_size +  0] = curr_cams_intrin[i](0, 0);
+        cam_params_host[i * cam_params_size +  1] = curr_cams_intrin[i](1, 1);
+        cam_params_host[i * cam_params_size +  2] = curr_cams_intrin[i](0, 2);
+        cam_params_host[i * cam_params_size +  3] = curr_cams_intrin[i](1, 2);
+        cam_params_host[i * cam_params_size +  4] = post_rot(0, 0);
+        cam_params_host[i * cam_params_size +  5] = post_rot(0, 1);
+        cam_params_host[i * cam_params_size +  6] = post_trans.translation()(0);
+        cam_params_host[i * cam_params_size +  7] = post_rot(1, 0);
+        cam_params_host[i * cam_params_size +  8] = post_rot(1, 1);
+        cam_params_host[i * cam_params_size +  9] = post_trans.translation()(1);
+        cam_params_host[i * cam_params_size + 10] = 1.f; // bda 0 0
+        cam_params_host[i * cam_params_size + 11] = 0.f; // bda 0 1
+        cam_params_host[i * cam_params_size + 12] = 0.f; // bda 1 0
+        cam_params_host[i * cam_params_size + 13] = 1.f; // bda 1 1
+        cam_params_host[i * cam_params_size + 14] = 1.f; // bda 2 2 
+        cam_params_host[i * cam_params_size + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
+        cam_params_host[i * cam_params_size + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
+        cam_params_host[i * cam_params_size + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
+        cam_params_host[i * cam_params_size + 18] = curr_cams2ego_trans[i].translation()(0);
+        cam_params_host[i * cam_params_size + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
+        cam_params_host[i * cam_params_size + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
+        cam_params_host[i * cam_params_size + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
+        cam_params_host[i * cam_params_size + 22] = curr_cams2ego_trans[i].translation()(1);
+        cam_params_host[i * cam_params_size + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
+        cam_params_host[i * cam_params_size + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
+        cam_params_host[i * cam_params_size + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
+        cam_params_host[i * cam_params_size + 26] = curr_cams2ego_trans[i].translation()(2);
+    }
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["cam_params"]], cam_params_host, 
+                        trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
+    // printf("trans : %d cam : %d\n", transform_size, cam_params_size);
+}
+
+
+void BEVDet::InitParams(const std::string &config_file){
+    mean = std::vector<float>(3);
+    std = std::vector<float>(3);
+
+    YAML::Node model_config = YAML::LoadFile(config_file);
+    N_img = model_config["data_config"]["Ncams"].as<int>();
+    src_img_h = model_config["data_config"]["src_size"][0].as<int>();
+    src_img_w = model_config["data_config"]["src_size"][1].as<int>();
+    input_img_h = model_config["data_config"]["input_size"][0].as<int>();
+    input_img_w = model_config["data_config"]["input_size"][1].as<int>();
+    crop_h = model_config["data_config"]["crop"][0].as<int>();
+    crop_w = model_config["data_config"]["crop"][1].as<int>();
+    mean[0] = model_config["mean"][0].as<float>();
+    mean[1] = model_config["mean"][1].as<float>();
+    mean[2] = model_config["mean"][2].as<float>();
+    std[0] = model_config["std"][0].as<float>();
+    std[1] = model_config["std"][1].as<float>();
+    std[2] = model_config["std"][2].as<float>();
+    down_sample = model_config["model"]["down_sample"].as<int>();
+    depth_start = model_config["grid_config"]["depth"][0].as<float>();
+    depth_end = model_config["grid_config"]["depth"][1].as<float>();
+    depth_step = model_config["grid_config"]["depth"][2].as<float>();
+    x_start = model_config["grid_config"]["x"][0].as<float>();
+    x_end = model_config["grid_config"]["x"][1].as<float>();
+    x_step = model_config["grid_config"]["x"][2].as<float>();
+    y_start = model_config["grid_config"]["y"][0].as<float>();
+    y_end = model_config["grid_config"]["y"][1].as<float>();
+    y_step = model_config["grid_config"]["y"][2].as<float>();
+    z_start = model_config["grid_config"]["z"][0].as<float>();
+    z_end = model_config["grid_config"]["z"][1].as<float>();
+    z_step = model_config["grid_config"]["z"][2].as<float>();
+    bevpool_channel = model_config["model"]["bevpool_channels"].as<int>();
+    nms_pre_maxnum = model_config["test_cfg"]["max_per_img"].as<int>();
+    nms_post_maxnum = model_config["test_cfg"]["post_max_size"].as<int>();
+    score_thresh = model_config["test_cfg"]["score_threshold"].as<float>();
+    nms_overlap_thresh = model_config["test_cfg"]["nms_thr"][0].as<float>();
+    use_depth = model_config["use_depth"].as<bool>();
+    use_adj = model_config["use_adj"].as<bool>();
+    transform_size = model_config["transform_size"].as<int>();
+    cam_params_size = model_config["cam_params_size"].as<int>();
+
+    std::vector<std::vector<float>> nms_factor_temp = model_config["test_cfg"]
+                            ["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
+    nms_rescale_factor.clear();
+    for(auto task_factors : nms_factor_temp){
+        for(float factor : task_factors){
+            nms_rescale_factor.push_back(factor);
+        }
+    }
+    
+    std::vector<std::vector<std::string>> class_name_pre_task;
+    class_num = 0;
+    YAML::Node tasks = model_config["model"]["tasks"];
+    class_num_pre_task = std::vector<int>();
+    for(auto it : tasks){
+        int num = it["num_class"].as<int>();
+        class_num_pre_task.push_back(num);
+        class_num += num;
+        class_name_pre_task.push_back(it["class_names"].as<std::vector<std::string>>());
+    }
+
+    YAML::Node common_head_channel = model_config["model"]["common_head"]["channels"];
+    YAML::Node common_head_name = model_config["model"]["common_head"]["names"];
+    for(size_t i = 0; i< common_head_channel.size(); i++){
+        out_num_task_head[common_head_name[i].as<std::string>()] = 
+                                                        common_head_channel[i].as<int>();
+    }
+
+    resize_radio = (float)input_img_w / src_img_w;
+    feat_h = input_img_h / down_sample;
+    feat_w = input_img_w / down_sample;
+    depth_num = (depth_end - depth_start) / depth_step;
+    xgrid_num = (x_end - x_start) / x_step;
+    ygrid_num = (y_end - y_start) / y_step;
+    zgrid_num = (z_end - z_start) / z_step;
+    bev_h = ygrid_num;
+    bev_w = xgrid_num;
+
+
+    post_rot << resize_radio, 0, 0,
+                0, resize_radio, 0,
+                0, 0, 1;
+    post_trans.translation() << -crop_w, -crop_h, 0;
+
+    adj_num = 0;
+    if(use_adj){
+        adj_num = model_config["adj_num"].as<int>();
+    }
+
+
+    postprocess_ptr.reset(new PostprocessGPU(class_num, score_thresh, nms_overlap_thresh,
+                                            nms_pre_maxnum, nms_post_maxnum, down_sample,
+                                            bev_h, bev_w, x_step, y_step, x_start,
+                                            y_start, class_num_pre_task, nms_rescale_factor));
+
+}
+
+void BEVDet::MallocDeviceMemory(){
+
+    trt_buffer_sizes.resize(trt_engine->getNbBindings());
+    trt_buffer_dev = (void**)new void*[trt_engine->getNbBindings()];
+    for(int i = 0; i < trt_engine->getNbBindings(); i++){
+        nvinfer1::Dims32 dim = trt_context->getBindingDimensions(i);
+        size_t size = 1;
+        for(int j = 0; j < dim.nbDims; j++){
+            size *= dim.d[j];
+        }
+        size *= dataTypeToSize(trt_engine->getBindingDataType(i));
+        trt_buffer_sizes[i] = size;
+        CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
+    }
+
+    std::cout << "img num binding : " << trt_engine->getNbBindings() << std::endl;
+
+    post_buffer = (void**)new void*[class_num_pre_task.size() * 6];
+    for(size_t i = 0; i < class_num_pre_task.size(); i++){
+        post_buffer[i * 6 + 0] = trt_buffer_dev[buffer_map["reg_" + std::to_string(i)]];
+        post_buffer[i * 6 + 1] = trt_buffer_dev[buffer_map["height_" + std::to_string(i)]];
+        post_buffer[i * 6 + 2] = trt_buffer_dev[buffer_map["dim_" + std::to_string(i)]];
+        post_buffer[i * 6 + 3] = trt_buffer_dev[buffer_map["rot_" + std::to_string(i)]];
+        post_buffer[i * 6 + 4] = trt_buffer_dev[buffer_map["vel_" + std::to_string(i)]];
+        post_buffer[i * 6 + 5] = trt_buffer_dev[buffer_map["heatmap_" + std::to_string(i)]];
+    }
+
+    return;
+}
+
+
+void BEVDet::InitViewTransformer(std::shared_ptr<int> &ranks_bev_ptr, 
+                                 std::shared_ptr<int> &ranks_depth_ptr, 
+                                 std::shared_ptr<int> &ranks_feat_ptr, 
+                                 std::shared_ptr<int> &interval_starts_ptr, 
+                                 std::shared_ptr<int> &interval_lengths_ptr){
+
+    int num_points = N_img * depth_num * feat_h * feat_w;
+    Eigen::Vector3f* frustum = new Eigen::Vector3f[num_points];
+
+    for(int i = 0; i < N_img; i++){
+        for(int d_ = 0; d_ < depth_num; d_++){
+            for(int h_ = 0; h_ < feat_h; h_++){
+                for(int w_ = 0; w_ < feat_w; w_++){
+                    int offset = i * depth_num * feat_h * feat_w + d_ * feat_h * feat_w
+                                                                 + h_ * feat_w + w_;
+                    (frustum + offset)->x() = (float)w_ * (input_img_w - 1) / (feat_w - 1);
+                    (frustum + offset)->y() = (float)h_ * (input_img_h - 1) / (feat_h - 1);
+                    (frustum + offset)->z() = (float)d_ * depth_step + depth_start;
+
+                    // eliminate post transformation
+                    *(frustum + offset) -= post_trans.translation();
+                    *(frustum + offset) = post_rot.inverse() * *(frustum + offset);
+                    // 
+                    (frustum + offset)->x() *= (frustum + offset)->z();
+                    (frustum + offset)->y() *= (frustum + offset)->z();
+                    // img to ego -> rot -> trans
+                    *(frustum + offset) = cams2ego_rot[i] * cams_intrin[i].inverse()
+                                    * *(frustum + offset) + cams2ego_trans[i].translation();
+
+                    // voxelization
+                    *(frustum + offset) -= Eigen::Vector3f(x_start, y_start, z_start);
+                    (frustum + offset)->x() = (int)((frustum + offset)->x() / x_step);
+                    (frustum + offset)->y() = (int)((frustum + offset)->y() / y_step);
+                    (frustum + offset)->z() = (int)((frustum + offset)->z() / z_step);
+                }
+            }
+        }
+    }
+
+    int* _ranks_depth = new int[num_points];
+    int* _ranks_feat = new int[num_points];
+
+    for(int i = 0; i < num_points; i++){
+        _ranks_depth[i] = i;
+    }
+    for(int i = 0; i < N_img; i++){
+        for(int d_ = 0; d_ < depth_num; d_++){
+            for(int u = 0; u < feat_h * feat_w; u++){
+                int offset = i * (depth_num * feat_h * feat_w) + d_ * (feat_h * feat_w) + u;
+                _ranks_feat[offset] = i * feat_h * feat_w + u;
+            }
+        }
+    }
+
+    std::vector<int> kept;
+    for(int i = 0; i < num_points; i++){
+        if((int)(frustum + i)->x() >= 0 && (int)(frustum + i)->x() < xgrid_num &&
+           (int)(frustum + i)->y() >= 0 && (int)(frustum + i)->y() < ygrid_num &&
+           (int)(frustum + i)->z() >= 0 && (int)(frustum + i)->z() < zgrid_num){
+            kept.push_back(i);
+        }
+    }
+
+    valid_feat_num = kept.size();
+    int* ranks_depth_host = new int[valid_feat_num];
+    int* ranks_feat_host = new int[valid_feat_num];
+    int* ranks_bev_host = new int[valid_feat_num];
+    int* order = new int[valid_feat_num];
+
+    for(int i = 0; i < valid_feat_num; i++){
+        Eigen::Vector3f &p = frustum[kept[i]];
+        ranks_bev_host[i] = (int)p.z() * xgrid_num * ygrid_num + 
+                            (int)p.y() * xgrid_num + (int)p.x();
+        order[i] = i;
+    }
+
+    thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num, order);
+    for(int i = 0; i < valid_feat_num; i++){
+        ranks_depth_host[i] = _ranks_depth[kept[order[i]]];
+        ranks_feat_host[i] = _ranks_feat[kept[order[i]]];
+    }
+
+    delete[] _ranks_depth;
+    delete[] _ranks_feat;
+    delete[] frustum;
+    delete[] order;
+
+    std::vector<int> interval_starts_host;
+    std::vector<int> interval_lengths_host;
+
+    interval_starts_host.push_back(0);
+    int len = 1;
+    for(int i = 1; i < valid_feat_num; i++){
+        if(ranks_bev_host[i] != ranks_bev_host[i - 1]){
+            interval_starts_host.push_back(i);
+            interval_lengths_host.push_back(len);
+            len=1;
+        }
+        else{
+            len++;
+        }
+    }
+    
+    interval_lengths_host.push_back(len);
+    unique_bev_num = interval_lengths_host.size();
+
+
+    int* interval_starts_host_ptr = new int[interval_starts_host.size()];
+    int* interval_lengths_host_ptr = new int[interval_lengths_host.size()];
+
+    memcpy(interval_starts_host_ptr, interval_starts_host.data(), 
+                                            interval_starts_host.size() * sizeof(int));
+    memcpy(interval_lengths_host_ptr, interval_lengths_host.data(), 
+                                            interval_lengths_host.size() * sizeof(int));
+
+    ranks_bev_ptr.reset(ranks_bev_host);
+    ranks_depth_ptr.reset(ranks_depth_host);
+    ranks_feat_ptr.reset(ranks_feat_host);
+    interval_starts_ptr.reset(interval_starts_host_ptr);
+    interval_lengths_ptr.reset(interval_lengths_host_ptr);
+
+    printf("valid_feat_num: %d \n", valid_feat_num);
+    printf("unique_bev_num: %d \n", unique_bev_num);
+}
+
+
+void print_dim(nvinfer1::Dims dim){
+    for(auto i = 0; i < dim.nbDims; i++){
+        printf("%d%c", dim.d[i], i == dim.nbDims - 1 ? '\n' : ' ');
+    }
+}
+
+int BEVDet::InitEngine(const std::string &engine_file){
+
+    if(DeserializeTRTEngine(engine_file, &trt_engine)){
+        return EXIT_FAILURE;
+    }
+
+    if(trt_engine == nullptr){
+        std::cerr << "Failed to deserialize engine file!" << std::endl;
+        return EXIT_FAILURE;
+    }
+    trt_context = trt_engine->createExecutionContext();
+
+    if (trt_context == nullptr) {
+        std::cerr << "Failed to create TensorRT Execution Context!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // set bindings
+    std::vector<nvinfer1::Dims32> shapes{
+        {4, {N_img, 3, src_img_h, src_img_w  / 4}},
+        {1, {3}},
+        {1, {3}},
+        {3, {1, N_img, cam_params_size}},
+        {1, {valid_feat_num}},
+        {1, {valid_feat_num}},
+        {1, {valid_feat_num}},
+        {1, {unique_bev_num}},
+        {1, {unique_bev_num}},
+        {5, {1, adj_num, bevpool_channel, bev_h, bev_w}},
+        {3, {1, adj_num, transform_size}},
+        {2, {1, 1}}
+    };
+
+
+
+    for(size_t i = 0; i < shapes.size(); i++){
+         trt_context->setBindingDimensions(i, shapes[i]);
+    }
+
+    buffer_map.clear();
+    for(auto i = 0; i < trt_engine->getNbBindings(); i++){
+        auto dim = trt_context->getBindingDimensions(i);
+        auto name = trt_engine->getBindingName(i);
+        buffer_map[name] = i;
+        std::cout << name << " : ";
+        print_dim(dim);
+    }
+    std::cout << std::endl;
+
+    return EXIT_SUCCESS;
+}
+void save_tensor(size_t size, const void * ptr, const std::string& file){
+    float* tensor = new float[size];
+    CHECK_CUDA(cudaMemcpy(tensor, ptr, size * sizeof(float), cudaMemcpyDeviceToHost));
+    std::ofstream out(file, std::ios::out | std::ios::binary);
+    out.write((char*)tensor, size * sizeof(float));
+    out.close();
+    delete[] tensor;
+}
+
+
+int BEVDet::DeserializeTRTEngine(const std::string &engine_file, 
+                                 nvinfer1::ICudaEngine **engine_ptr){
+    int verbosity = static_cast<int>(nvinfer1::ILogger::Severity::kWARNING);
+    std::stringstream engine_stream;
+    engine_stream.seekg(0, engine_stream.beg);
+
+    std::ifstream file(engine_file);
+    engine_stream << file.rdbuf();
+    file.close();
+
+    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(g_logger);
+    if (runtime == nullptr) {
+        std::string msg("Failed to build runtime parser!");
+        g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+        return EXIT_FAILURE;
+    }
+    engine_stream.seekg(0, std::ios::end);
+    const int engine_size = engine_stream.tellg();
+
+    engine_stream.seekg(0, std::ios::beg); 
+    void* engine_str = malloc(engine_size);
+    engine_stream.read((char*)engine_str, engine_size);
+    
+    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
+    if (engine == nullptr) {
+        std::string msg("Failed to build engine parser!");
+        g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+        return EXIT_FAILURE;
+    }
+    *engine_ptr = engine;
+    for (int bi = 0; bi < engine->getNbBindings(); bi++) {
+        if (engine->bindingIsInput(bi) == true){
+            printf("Binding %d (%s): Input. \n", bi, engine->getBindingName(bi));
+        }
+        else{
+            printf("Binding %d (%s): Output. \n", bi, engine->getBindingName(bi));
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+
+void BEVDet::GetAdjBEVFeature(const std::string &curr_scene_token, 
+                         const Eigen::Quaternion<float> &ego2global_rot,
+                         const Eigen::Translation3f &ego2global_trans) {
+
+    int flag = 1;
+    if(adj_frame_ptr->lastScenesToken() != curr_scene_token){
+        adj_frame_ptr->reset();
+        flag = 0;
+    }
+
+    // idx越小, adj_bevfeat越新 
+    for(int i = 0; i < adj_num; i++){
+        const void* adj_buffer = adj_frame_ptr->getFrameBuffer(i);
+
+        size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
+
+        CHECK_CUDA(cudaMemcpy((char*)trt_buffer_dev[buffer_map["adj_feats"]] + i * buf_size,
+                                adj_buffer, buf_size, cudaMemcpyDeviceToDevice));
+
+        Eigen::Quaternion<float> adj_ego2global_rot;
+        Eigen::Translation3f adj_ego2global_trans;
+        adj_frame_ptr->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
+        
+        GetCurr2AdjTransform(ego2global_rot, 
+                            adj_ego2global_rot, 
+                            ego2global_trans,
+                            adj_ego2global_trans, 
+                            (float *)trt_buffer_dev[buffer_map["transforms"]] + i * transform_size);
+    }
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["flag"]], &flag, 
+                        trt_buffer_sizes[buffer_map["flag"]], cudaMemcpyHostToDevice));
+
+}
+
+
+void BEVDet::GetCurr2AdjTransform(const Eigen::Quaternion<float> &curr_ego2global_rot,
+                                const Eigen::Quaternion<float> &adj_ego2global_rot,
+                                const Eigen::Translation3f &curr_ego2global_trans,
+                                const Eigen::Translation3f &adj_ego2global_trans,
+                                float* transform_dev){
+    Eigen::Matrix4f curr_e2g_transform;
+    Eigen::Matrix4f adj_e2g_transform;
+
+    for(int i = 0; i < 3; i++){
+        for(int j = 0; j < 3; j++){
+            curr_e2g_transform(i, j) = curr_ego2global_rot.matrix()(i, j);
+            adj_e2g_transform(i, j) = adj_ego2global_rot.matrix()(i, j);
+        }
+    }
+    for(int i = 0; i < 3; i++){
+        curr_e2g_transform(i, 3) = curr_ego2global_trans.vector()(i);
+        adj_e2g_transform(i, 3) = adj_ego2global_trans.vector()(i);
+
+        curr_e2g_transform(3, i) = 0.f;
+        adj_e2g_transform(3, i) = 0.f;
+    }
+    curr_e2g_transform(3, 3) = 1.f;
+    adj_e2g_transform(3, 3) = 1.f;
+
+    Eigen::Matrix4f currEgo2adjEgo = adj_e2g_transform.inverse() * curr_e2g_transform;
+    Eigen::Matrix3f currEgo2adjEgo_2d;
+    for(int i = 0; i < 2; i++){
+        for(int j = 0; j < 2; j++){
+            currEgo2adjEgo_2d(i, j) = currEgo2adjEgo(i, j);
+        }
+    }
+    currEgo2adjEgo_2d(2, 0) = 0.f;
+    currEgo2adjEgo_2d(2, 1) = 0.f;
+    currEgo2adjEgo_2d(2, 2) = 1.f;
+    currEgo2adjEgo_2d(0, 2) = currEgo2adjEgo(0, 3);
+    currEgo2adjEgo_2d(1, 2) = currEgo2adjEgo(1, 3);
+
+    Eigen::Matrix3f gridbev2egobev;
+    gridbev2egobev(0, 0) = x_step;
+    gridbev2egobev(1, 1) = y_step;
+    gridbev2egobev(0, 2) = x_start;
+    gridbev2egobev(1, 2) = y_start;
+    gridbev2egobev(2, 2) = 1.f;
+
+    gridbev2egobev(0, 1) = 0.f;
+    gridbev2egobev(1, 0) = 0.f;
+    gridbev2egobev(2, 0) = 0.f;
+    gridbev2egobev(2, 1) = 0.f;
+
+    Eigen::Matrix3f currgrid2adjgrid = gridbev2egobev.inverse() * currEgo2adjEgo_2d * gridbev2egobev;
+
+    CHECK_CUDA(cudaMemcpy(transform_dev, Eigen::Matrix3f(currgrid2adjgrid.transpose()).data(),
+                                    transform_size * sizeof(float), cudaMemcpyHostToDevice));
+
+}
+
+
+
+int BEVDet::DoInfer(const camsData& cam_data, std::vector<Box> &out_detections, float &cost_time,
+                                                                                    int idx){
+    
+    printf("-------------------%d-------------------\n", idx + 1);
+
+    printf("scenes_token : %s, timestamp : %lld\n", cam_data.param.scene_token.data(), 
+                                cam_data.param.timestamp);
+
+    auto start = high_resolution_clock::now();
+    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, 
+                trt_buffer_sizes[buffer_map["images"]], cudaMemcpyDeviceToDevice));
+
+    InitCamParams(cam_data.param.cams2ego_rot, 
+                cam_data.param.cams2ego_trans, 
+                cam_data.param.cams_intrin);
+
+    GetAdjBEVFeature(cam_data.param.scene_token, 
+                    cam_data.param.ego2global_rot, 
+                    cam_data.param.ego2global_trans);
+
+    if(!trt_context->executeV2(trt_buffer_dev)){ 
+        printf("BEVDet forward failing!\n");
+    }
+
+    adj_frame_ptr->saveFrameBuffer(trt_buffer_dev[buffer_map["curr_bevfeat"]],
+                                    cam_data.param.scene_token, 
+                                    cam_data.param.ego2global_rot, 
+                                    cam_data.param.ego2global_trans);
+
+
+    auto end = high_resolution_clock::now();
+
+    postprocess_ptr->DoPostprocess(post_buffer, out_detections);
+    CHECK_CUDA(cudaDeviceSynchronize());
+
+    auto post_end = high_resolution_clock::now();
+
+    duration<double> infer_t = post_end - start;
+    duration<double> engine_t = end - start;
+    duration<double> post_t = post_end - end;
+
+    cost_time = infer_t.count() * 1000;
+    printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
+    printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
+    printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
+
+    printf("Detect %ld objects\n", out_detections.size());
+
+    return EXIT_SUCCESS;
+}
+
+void BEVDet::ExportEngine(const std::string &onnxFile, const std::string &trtFile){
+    CHECK_CUDA(cudaSetDevice(0));
+    nvinfer1::ICudaEngine *engine = nullptr;
+    std::vector<nvinfer1::Dims32> min_shapes{
+    {4, {6, 3, 900, 400}},
+    {1, {3}},
+    {1, {3}},
+    {3, {1, 6, 27}},
+    {1, {200000}},
+    {1, {200000}},
+    {1, {200000}},
+    {1, {8000}},
+    {1, {8000}},
+    {5, {1, 8, 80, 128, 128}},
+    {3, {1, 8, 6}},
+    {2, {1, 1}}
+    };
+
+    std::vector<nvinfer1::Dims32> opt_shapes{
+        {4, {6, 3, 900, 400}},
+        {1, {3}},
+        {1, {3}},
+        {3, {1, 6, 27}},
+        {1, {356760}},
+        {1, {356760}},
+        {1, {356760}},
+        {1, {13360}},
+        {1, {13360}},
+        {5, {1, 8, 80, 128, 128}},
+        {3, {1, 8, 6}},
+        {2, {1, 1}}
+    };
+
+
+    std::vector<nvinfer1::Dims32> max_shapes{
+        {4, {6, 3, 900, 400}},
+        {1, {3}},
+        {1, {3}},
+        {3, {1, 6, 27}},
+        {1, {370000}},
+        {1, {370000}},
+        {1, {370000}},
+        {1, {14000}},
+        {1, {14000}},
+        {5, {1, 8, 80, 128, 128}},
+        {3, {1, 8, 6}},
+        {2, {1, 1}}
+    };
+    nvinfer1::IBuilder *            builder = nvinfer1::createInferBuilder(g_logger);
+    nvinfer1::INetworkDefinition *  network = builder->createNetworkV2(1U << int(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
+    nvinfer1::IOptimizationProfile *profile = builder->createOptimizationProfile();
+    nvinfer1::IBuilderConfig *      config  = builder->createBuilderConfig();
+    config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 3UL << 32UL);
+        
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+
+    nvonnxparser::IParser *parser = nvonnxparser::createParser(*network, g_logger);
+
+
+    if (!parser->parseFromFile(onnxFile.c_str(), int(g_logger.reportable_severity)))
+    {
+        std::cout << std::string("Failed parsing .onnx file!") << std::endl;
+        for (int i = 0; i < parser->getNbErrors(); ++i)
+        {
+            auto *error = parser->getError(i);
+            std::cout << std::to_string(int(error->code())) << std::string(":") << std::string(error->desc()) << std::endl;
+        }
+
+        return;
+    }
+    std::cout << std::string("Succeeded parsing .onnx file!") << std::endl;
+
+    for(size_t i = 0; i < min_shapes.size(); i++){
+        nvinfer1::ITensor *it = network->getInput(i);
+        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMIN, min_shapes[i]);
+        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shapes[i]);
+        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMAX, max_shapes[i]);
+    }
+    config->addOptimizationProfile(profile);
+
+    nvinfer1::IHostMemory *engineString = builder->buildSerializedNetwork(*network, *config);
+    if (engineString == nullptr || engineString->size() == 0)
+    {
+        std::cout << "Failed building serialized engine!" << std::endl;
+        return;
+    }
+    std::cout << "Succeeded building serialized engine!" << std::endl;
+
+    nvinfer1::IRuntime *runtime {nvinfer1::createInferRuntime(g_logger)};
+    engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
+    if (engine == nullptr)
+    {
+        std::cout << "Failed building engine!" << std::endl;
+        return;
+    }
+    std::cout << "Succeeded building engine!" << std::endl;
+
+    std::ofstream engineFile(trtFile, std::ios::binary);
+    if (!engineFile)
+    {
+        std::cout << "Failed opening file to write" << std::endl;
+        return;
+    }
+    engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
+    if (engineFile.fail())
+    {
+        std::cout << "Failed saving .engine file!" << std::endl;
+        return;
+    }
+    std::cout << "Succeeded saving .engine file!" << std::endl;
+}
+
+BEVDet::~BEVDet(){
+
+    for(int i = 0; i < trt_engine->getNbBindings(); i++){
+        CHECK_CUDA(cudaFree(trt_buffer_dev[i]));
+    }
+    delete[] trt_buffer_dev;
+    delete[] post_buffer;
+
+    delete[] cam_params_host;
+
+    trt_context->destroy();
+    trt_engine->destroy();
+}
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/tensorrt_bevdet/src/bevdet_node.cpp
new file mode 100644
index 0000000000000..f7c34d65f123f
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/bevdet_node.cpp
@@ -0,0 +1,259 @@
+#include "bevdet_node.hpp"
+
+using Label = autoware_perception_msgs::msg::ObjectClassification;
+std::map< int, std::vector<int>> colormap { 
+            {0, {0, 0, 255}},  // dodger blue 
+            {1, {0, 201, 87}},   
+            {2, {0, 201, 87}},
+            {3, {160, 32, 240}},
+            {4, {3, 168, 158}},
+            {5, {255, 0, 0}},
+            {6, {255, 97, 0}},
+            {7, {30,  0, 255}},
+            {8, {255, 0, 0}},
+            {9, {0, 0, 255}},
+            {10, {0, 0, 0}}
+};
+
+uint8_t getSemanticType(const std::string & class_name)
+{
+  if (class_name == "car") {
+    return Label::CAR;
+  } else if (class_name == "truck") {
+    return Label::TRUCK;
+  } else if (class_name == "bus") {
+    return Label::BUS;
+  } else if (class_name == "trailer") {
+    return Label::TRAILER;
+  } else if (class_name == "bicycle") {
+    return Label::BICYCLE;
+  } else if (class_name == "motorcycle") {
+    return Label::MOTORCYCLE;
+  } else if (class_name == "pedestrian") {
+    return Label::PEDESTRIAN;
+  } else {
+    return Label::UNKNOWN;
+  }
+}
+
+void Getinfo(void) 
+{
+    cudaDeviceProp prop;
+
+    int count = 0;
+    cudaGetDeviceCount(&count);
+    printf("\nGPU has cuda devices: %d\n", count);
+    for (int i = 0; i < count; ++i) {
+        cudaGetDeviceProperties(&prop, i);
+        printf("----device id: %d info----\n", i);
+        printf("  GPU : %s \n", prop.name);
+        printf("  Capbility: %d.%d\n", prop.major, prop.minor);
+        printf("  Global memory: %luMB\n", prop.totalGlobalMem >> 20);
+        printf("  Const memory: %luKB\n", prop.totalConstMem >> 10);
+        printf("  Shared memory in a block: %luKB\n", prop.sharedMemPerBlock >> 10);
+        printf("  warp size: %d\n", prop.warpSize);
+        printf("  threads in a block: %d\n", prop.maxThreadsPerBlock);
+        printf("  block dim: (%d,%d,%d)\n", prop.maxThreadsDim[0],
+                prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
+        printf("  grid dim: (%d,%d,%d)\n", prop.maxGridSize[0], prop.maxGridSize[1],
+                prop.maxGridSize[2]);
+    }
+    printf("\n");
+}
+
+void box3DToDetectedObjects(const std::vector<Box>& boxes, 
+                    autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
+                    const std::vector<std::string> & class_names,
+                    float score_thre, const bool has_twist=true)
+{
+    for(auto b : boxes)
+    {   
+        if(b.score < score_thre)
+            continue;
+        autoware_perception_msgs::msg::DetectedObject obj;
+
+        Eigen::Vector3f center(b.x, b.y, b.z + b.h/2.);
+
+        obj.existence_probability = b.score;
+        // classification
+        autoware_perception_msgs::msg::ObjectClassification classification;
+        classification.probability = 1.0f;
+        if (b.label >= 0 && static_cast<size_t>(b.label) < class_names.size()) {
+            classification.label = getSemanticType(class_names[b.label]);
+        } else {
+            classification.label = Label::UNKNOWN;
+        }
+        obj.classification.emplace_back(classification);
+
+        // pose and shape
+        geometry_msgs::msg::Point p;
+        p.x = center.x();
+        p.y = center.y();
+        p.z = center.z();
+        obj.kinematics.pose_with_covariance.pose.position = p;
+
+        tf2::Quaternion q;
+        q.setRPY(0, 0, b.r);
+        obj.kinematics.pose_with_covariance.pose.orientation = tf2::toMsg(q);
+
+        obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+
+        geometry_msgs::msg::Vector3 v;
+        v.x = b.l;
+        v.y = b.w;
+        v.z = b.h;
+        obj.shape.dimensions = v;
+        if (has_twist) {
+          float vel_x = b.vx;
+          float vel_y = b.vy;
+          geometry_msgs::msg::Twist twist;
+          twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
+          twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - b.r);
+          obj.kinematics.twist_with_covariance.twist = twist;
+          obj.kinematics.has_twist = has_twist;
+        }
+
+        bevdet_objects.objects.emplace_back(obj);
+
+    }
+}
+
+TRTBEVDetNode::TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & node_options)
+: rclcpp::Node(node_name, node_options)
+{
+    using std::placeholders::_1;
+    using std::placeholders::_2;
+    using std::placeholders::_3;
+    using std::placeholders::_4;
+    using std::placeholders::_5;
+    using std::placeholders::_6;
+    using std::placeholders::_7;
+
+    score_thre_ = this->declare_parameter<float>("post_process_params.score_thre", 0.2);
+
+    img_N_ = this->declare_parameter<int>("data_params.N", 6); // camera num 6
+    img_w_ = this->declare_parameter<int>("data_params.W", 1600); // W: 1600
+    img_h_ = this->declare_parameter<int>("data_params.H", 900); // H: 900
+     
+    model_config_ = this->declare_parameter("model_config", "bevdet_r50_4dlongterm_depth.yaml");
+    
+    onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
+    engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
+    
+    camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml")); 
+    
+    imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
+    class_names_ = this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
+  
+    RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
+    
+    sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
+
+    RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
+
+    bevdet_ = std::make_shared<BEVDet>(model_config_, img_N_, sampleData_.param.cams_intrin, 
+                sampleData_.param.cams2ego_rot, sampleData_.param.cams2ego_trans, onnx_file_, 
+                                                    engine_file_);
+    
+    RCLCPP_INFO_STREAM(this->get_logger(), "Successful create bevdet!");
+    
+    CHECK_CUDA(cudaMalloc((void**)&imgs_dev_, img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
+
+    pub_cloud_ = this ->create_publisher<sensor_msgs::msg::PointCloud2>("~/output/painting_cloud", rclcpp::SensorDataQoS()); 
+    pub_boxes_ = this ->create_publisher<autoware_perception_msgs::msg::DetectedObjects>("~/output/boxes", rclcpp::QoS{1});   
+    
+    sub_cloud_.subscribe(this, "~/input/topic_cloud", rclcpp::QoS{1}.get_rmw_qos_profile());//rclcpp::QoS{1}.get_rmw_qos_profile()
+    sub_f_img_.subscribe(this, "~/input/topic_img_f", rclcpp::QoS{1}.get_rmw_qos_profile());//rmw_qos_profile_sensor_data
+    sub_b_img_.subscribe(this, "~/input/topic_img_b", rclcpp::QoS{1}.get_rmw_qos_profile());
+
+    sub_fl_img_.subscribe(this, "~/input/topic_img_fl", rclcpp::QoS{1}.get_rmw_qos_profile());
+    sub_fr_img_.subscribe(this, "~/input/topic_img_fr", rclcpp::QoS{1}.get_rmw_qos_profile());
+    
+    sub_bl_img_.subscribe(this, "~/input/topic_img_bl", rclcpp::QoS{1}.get_rmw_qos_profile());
+    sub_br_img_.subscribe(this, "~/input/topic_img_br", rclcpp::QoS{1}.get_rmw_qos_profile());
+  
+    sync_ = std::make_shared<Sync>( MySyncPolicy(10), sub_cloud_, 
+    sub_fl_img_, sub_f_img_, sub_fr_img_,
+    sub_bl_img_ ,sub_b_img_, sub_br_img_); 
+  
+    sync_->registerCallback(std::bind(&TRTBEVDetNode::callback,this, _1, _2, _3, _4, _5, _6,_7)); // 绑定回调函数
+  
+}
+
+
+void TRTBEVDetNode::callback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud, 
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
+    const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img)
+{   
+    
+    pcl::PointCloud<PointT>::Ptr cloud(new pcl::PointCloud<PointT>);
+    
+    pcl::fromROSMsg(*msg_cloud, *cloud);
+   
+    cv::Mat img_fl, img_f, img_fr, img_bl, img_b, img_br;
+    std::vector<cv::Mat> imgs;
+    img_fl = cv_bridge::toCvShare(msg_fl_img , "bgr8")->image;
+    img_f  = cv_bridge::toCvShare(msg_f_img, "bgr8")->image;
+    img_fr = cv_bridge::toCvShare(msg_fr_img, "bgr8")->image;
+    img_bl = cv_bridge::toCvShare(msg_bl_img , "bgr8")->image;
+    img_b  = cv_bridge::toCvShare(msg_b_img, "bgr8")->image;
+    img_br = cv_bridge::toCvShare(msg_br_img, "bgr8")->image;
+
+    imgs.emplace_back(img_fl);
+    imgs.emplace_back(img_f);
+    imgs.emplace_back(img_fr);
+    imgs.emplace_back(img_bl);
+    imgs.emplace_back(img_b);
+    imgs.emplace_back(img_br);
+
+    std::vector<std::vector<char>> imgs_data;
+    // Mat -> Vetor<char>  
+    cvImgToArr(imgs, imgs_data);
+    
+    // cpu imgs_data -> gpu imgs_dev
+    decode_cpu(imgs_data, imgs_dev_, img_w_, img_h_);
+
+    // uchar *imgs_dev
+    sampleData_.imgs_dev = imgs_dev_;
+
+    std::vector<Box> ego_boxes;
+    ego_boxes.clear();
+    float time = 0.f;
+    
+    bevdet_->DoInfer(sampleData_, ego_boxes, time);
+    
+    autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
+    bevdet_objects.header.frame_id = "base_link";
+    bevdet_objects.header.stamp = msg_cloud->header.stamp;
+    
+    box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_);
+    
+    pub_boxes_ -> publish(bevdet_objects);
+    
+    sensor_msgs::msg::PointCloud2 msg_cloud_painting;
+    pcl::toROSMsg(*cloud, msg_cloud_painting);
+
+    msg_cloud_painting.header.frame_id = msg_cloud->header.frame_id;
+    msg_cloud_painting.header.stamp = msg_cloud->header.stamp;
+    pub_cloud_ -> publish(msg_cloud_painting);
+}
+
+
+TRTBEVDetNode::~TRTBEVDetNode()
+{
+    delete imgs_dev_;
+}
+
+
+int main(int argc, char **argv)
+{   
+    rclcpp::init(argc, argv);
+    rclcpp::NodeOptions node_options;
+    auto bevdet_node = std::make_shared<TRTBEVDetNode>("tensorrt_bevdet_node", node_options);
+    rclcpp::spin(bevdet_node);
+    return 0;
+}
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/src/bevpool_plugin.cu b/perception/tensorrt_bevdet/src/bevpool_plugin.cu
new file mode 100644
index 0000000000000..8654b6c3bf688
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/bevpool_plugin.cu
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bevpool_plugin.hpp"
+#include "common.hpp"
+#include <cuda_fp16.h>
+#include <cuda_runtime_api.h>
+
+// input[0] depth            b*n x d x h x w
+// input[1] feat             b*n x c x h x w
+// input[2] ranks_depth      m
+// input[3] ranks_feat       m
+// input[4] ranks_bev        m
+// input[5] interval_starts  k
+// input[6] interval_lengths k
+
+// out[0]   bevfeat          b x c x h x w    
+
+template<typename T1, typename T2>
+__global__ void bev_pool_v2_kernel(int channel, 
+                                    int n_intervals,
+                                    int map_size,
+                                    int img_size,
+                                    const T1 *__restrict__ depth,
+                                    const T1 *__restrict__ feat,
+                                    const int *__restrict__ ranks_depth,
+                                    const int *__restrict__ ranks_feat,
+                                    const int *__restrict__ ranks_bev,
+                                    const int *__restrict__ interval_starts,
+                                    const int *__restrict__ interval_lengths,
+                                    T2 * __restrict__ out) {
+    CUDA_1D_KERNEL_LOOP(idx, n_intervals * channel){
+        int index = idx / channel;    // bev grid index
+        int curr_c = idx % channel;    // channel index
+        int interval_start = interval_starts[index];  
+        int interval_length = interval_lengths[index];  
+
+        int curr_step = curr_c * img_size;
+        int chan_step = channel * img_size;
+
+        T2 sum = 0;
+
+        int feat_offset = 0;
+        for(int i = 0; i < interval_length; i++){
+            feat_offset = ranks_feat[interval_start + i] / img_size * chan_step + 
+                          curr_step + ranks_feat[interval_start + i] % img_size;
+  
+            sum += static_cast<T2>(feat[feat_offset]) * static_cast<T2>(depth[ranks_depth[interval_start + i]]);
+        }
+        out[curr_c * map_size + ranks_bev[interval_start]] = sum;
+    }
+}
+
+namespace nvinfer1
+{
+// class BEVPoolPlugin
+BEVPoolPlugin::BEVPoolPlugin(const std::string &name, int bev_h, int bev_w, int n):
+    name_(name){
+    m_.bev_h = bev_h;
+    m_.bev_w = bev_w;
+    m_.n = n;
+}
+
+BEVPoolPlugin::BEVPoolPlugin(const std::string &name, const void *buffer, size_t length):
+    name_(name){
+    memcpy(&m_, buffer, sizeof(m_));
+}
+
+BEVPoolPlugin::~BEVPoolPlugin(){
+}
+
+IPluginV2DynamicExt *BEVPoolPlugin::clone() const noexcept {
+    auto p = new BEVPoolPlugin(name_, &m_, sizeof(m_));
+    p->setPluginNamespace(namespace_.c_str());
+    return p;
+}
+
+int32_t BEVPoolPlugin::getNbOutputs() const noexcept {
+    return 1;
+}
+ 
+DataType BEVPoolPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
+                                                                int32_t nbInputs) const noexcept {
+    return DataType::kFLOAT;
+}
+
+DimsExprs BEVPoolPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
+                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
+    // input[0] depth            b*n x d x h x w
+    // input[1] feat             b*n x c x h x w
+
+    // input[2] ranks_depth      m
+    // input[3] ranks_feat       m
+    // input[4] ranks_bev        m
+    // input[5] interval_starts  k
+    // input[6] interval_lengths k
+
+    // out[0]   bevfeat          b x c x h x w    
+
+    DimsExprs ret;
+    ret.nbDims = inputs[1].nbDims;
+    ret.d[0] = exprBuilder.operation(DimensionOperation::kFLOOR_DIV, *inputs[1].d[0], *exprBuilder.constant(m_.n)); 
+    ret.d[1] = inputs[1].d[1];
+    ret.d[2] = exprBuilder.constant(m_.bev_h);
+    ret.d[3] = exprBuilder.constant(m_.bev_w);
+    
+    return ret; 
+}
+
+bool BEVPoolPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
+                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
+    // depth
+    if(pos == 0){
+        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+                 inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 1){ // feat
+        return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 7){ // out
+        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+                inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else{
+        return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    return false;
+}
+
+void BEVPoolPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
+                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
+    return;
+}
+
+size_t BEVPoolPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
+                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
+    return 0;
+}
+
+int32_t BEVPoolPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
+    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+    // input[0] == depth            b*n x d x h x w
+    // input[1] == feat             b*n x c x h x w
+    // input[2] == ranks_depth      m
+    // input[3] == ranks_feat       m
+    // input[4] == ranks_bev        m
+    // input[5] == interval_starts  k
+    // input[6] == interval_lengths k
+
+    int channel = inputDesc[1].dims.d[1];
+    int n_intervals = inputDesc[5].dims.d[0];
+    int map_size = m_.bev_h * m_.bev_w;
+    int img_size = inputDesc[0].dims.d[2] * inputDesc[0].dims.d[3];
+
+    dim3 grid(GET_BLOCKS(n_intervals * channel));
+    dim3 block(NUM_THREADS);
+
+    // printf("BEVPool input depth %s\n", dataTypeToString(inputDesc[0].type).c_str());
+    // printf("BEVPool input  feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
+    // printf("BEVPool output feat %s\n", dataTypeToString(outputDesc[0].type).c_str());
+
+    switch (int(outputDesc[0].type))
+    {
+    case int(DataType::kFLOAT):
+        if(inputDesc[0].type == DataType::kFLOAT){
+            // printf("bevpool : fp32 fp32\n");
+            bev_pool_v2_kernel<float, float><<<grid, block, 0, stream>>>(
+                                                        channel, 
+                                                        n_intervals,
+                                                        map_size,
+                                                        img_size,
+                                                        reinterpret_cast<const float *>(inputs[0]),
+                                                        reinterpret_cast<const float *>(inputs[1]),
+                                                        reinterpret_cast<const int *>(inputs[2]),
+                                                        reinterpret_cast<const int *>(inputs[3]),
+                                                        reinterpret_cast<const int *>(inputs[4]),
+                                                        reinterpret_cast<const int *>(inputs[5]),
+                                                        reinterpret_cast<const int *>(inputs[6]),
+                                                        reinterpret_cast<float *>(outputs[0]));
+        }
+        else{
+            // printf("bevpool : fp16 fp32\n");
+            bev_pool_v2_kernel<__half, float><<<grid, block, 0, stream>>>(
+                                                        channel, 
+                                                        n_intervals,
+                                                        map_size,
+                                                        img_size,
+                                                        reinterpret_cast<const __half *>(inputs[0]),
+                                                        reinterpret_cast<const __half *>(inputs[1]),
+                                                        reinterpret_cast<const int *>(inputs[2]),
+                                                        reinterpret_cast<const int *>(inputs[3]),
+                                                        reinterpret_cast<const int *>(inputs[4]),
+                                                        reinterpret_cast<const int *>(inputs[5]),
+                                                        reinterpret_cast<const int *>(inputs[6]),
+                                                        reinterpret_cast<float *>(outputs[0]));
+        }
+        break;
+    case int(DataType::kHALF):
+        if(inputDesc[0].type == DataType::kFLOAT){
+            // printf("bevpool : fp32 fp16\n");
+            bev_pool_v2_kernel<float, __half><<<grid, block, 0, stream>>>(
+                                                        channel, 
+                                                        n_intervals,
+                                                        map_size,
+                                                        img_size,
+                                                        reinterpret_cast<const float *>(inputs[0]),
+                                                        reinterpret_cast<const float *>(inputs[1]),
+                                                        reinterpret_cast<const int *>(inputs[2]),
+                                                        reinterpret_cast<const int *>(inputs[3]),
+                                                        reinterpret_cast<const int *>(inputs[4]),
+                                                        reinterpret_cast<const int *>(inputs[5]),
+                                                        reinterpret_cast<const int *>(inputs[6]),
+                                                        reinterpret_cast<__half *>(outputs[0]));
+        }
+        else{
+            // printf("bevpool : fp16 fp16\n");
+            bev_pool_v2_kernel<__half, __half><<<grid, block, 0, stream>>>(
+                                                        channel, 
+                                                        n_intervals,
+                                                        map_size,
+                                                        img_size,
+                                                        reinterpret_cast<const __half *>(inputs[0]),
+                                                        reinterpret_cast<const __half *>(inputs[1]),
+                                                        reinterpret_cast<const int *>(inputs[2]),
+                                                        reinterpret_cast<const int *>(inputs[3]),
+                                                        reinterpret_cast<const int *>(inputs[4]),
+                                                        reinterpret_cast<const int *>(inputs[5]),
+                                                        reinterpret_cast<const int *>(inputs[6]),
+                                                        reinterpret_cast<__half *>(outputs[0]));
+        }
+        break;
+    default: // should NOT be here
+        printf("\tUnsupport datatype!\n");
+    }
+    return 0;
+}
+
+void BEVPoolPlugin::destroy() noexcept {
+    delete this;
+    return;
+}
+
+int32_t BEVPoolPlugin::initialize() noexcept {
+    return 0;
+}
+
+void BEVPoolPlugin::terminate() noexcept {
+    return;
+}
+
+size_t BEVPoolPlugin::getSerializationSize() const noexcept {
+    return sizeof(m_);
+}
+
+void BEVPoolPlugin::serialize(void *buffer) const noexcept {
+    memcpy(buffer, &m_, sizeof(m_));
+    return;
+}
+
+void BEVPoolPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *BEVPoolPlugin::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *BEVPoolPlugin::getPluginType() const noexcept {
+    return BEVPOOL_PLUGIN_NAME;
+}
+
+const char *BEVPoolPlugin::getPluginVersion() const noexcept {
+    return BEVPOOL_PLUGIN_VERSION;
+}
+
+void BEVPoolPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
+                                                        IGpuAllocator *gpuAllocator) noexcept {
+    return;
+}
+
+void BEVPoolPlugin::detachFromContext() noexcept {
+    return;
+}
+
+// class BEVPoolPluginCreator
+PluginFieldCollection    BEVPoolPluginCreator::fc_ {};
+std::vector<PluginField> BEVPoolPluginCreator::attr_;
+
+BEVPoolPluginCreator::BEVPoolPluginCreator() {
+    attr_.clear();
+    attr_.emplace_back(PluginField("bev_h", nullptr, PluginFieldType::kINT32, 1));
+    attr_.emplace_back(PluginField("bev_w", nullptr, PluginFieldType::kINT32, 1));
+    attr_.emplace_back(PluginField("n", nullptr, PluginFieldType::kINT32, 1));
+
+
+    fc_.nbFields = attr_.size();
+    fc_.fields   = attr_.data();
+}
+
+BEVPoolPluginCreator::~BEVPoolPluginCreator() {
+}
+
+
+IPluginV2DynamicExt *BEVPoolPluginCreator::createPlugin(const char *name, 
+                                    const PluginFieldCollection *fc) noexcept {
+    const PluginField *fields = fc->fields;
+
+    int bev_h = -1;
+    int bev_w = -1;
+    int n = -1;
+
+    for (int i = 0; i < fc->nbFields; ++i){
+        if(std::string(fc->fields[i].name) == std::string("bev_h")){
+            bev_h = *reinterpret_cast<const int *>(fc->fields[i].data);
+        }
+        else if(std::string(fc->fields[i].name) == std::string("bev_w")){
+            bev_w = *reinterpret_cast<const int *>(fc->fields[i].data);
+        }
+        else if(std::string(fc->fields[i].name) == std::string("n")){
+            n = *reinterpret_cast<const int *>(fc->fields[i].data);
+        }
+    }
+    BEVPoolPlugin *pObj = new BEVPoolPlugin(name, bev_h, bev_w, n);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+IPluginV2DynamicExt *BEVPoolPluginCreator::deserializePlugin(const char *name, 
+                                        const void *serialData, size_t serialLength) noexcept {
+    BEVPoolPlugin *pObj = new BEVPoolPlugin(name, serialData, serialLength);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+void BEVPoolPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *BEVPoolPluginCreator::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *BEVPoolPluginCreator::getPluginName() const noexcept {
+    return BEVPOOL_PLUGIN_NAME;
+}
+
+const char *BEVPoolPluginCreator::getPluginVersion() const noexcept {
+    return BEVPOOL_PLUGIN_VERSION;
+}
+
+const PluginFieldCollection *BEVPoolPluginCreator::getFieldNames() noexcept {
+    return &fc_;
+}
+
+REGISTER_TENSORRT_PLUGIN(BEVPoolPluginCreator);
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
new file mode 100644
index 0000000000000..2df63c21701fc
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -0,0 +1,73 @@
+#include <stddef.h>
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+#include <vector>
+#include <cuda_runtime.h>
+#include "preprocess.hpp"
+
+#include "cpu_jpegdecoder.hpp"
+
+
+struct jpeg_error_handler {
+    struct jpeg_error_mgr pub;
+    jmp_buf setjmp_buffer;
+};
+
+int decode_jpeg(const std::vector<char>& buffer, uchar* output) {
+    struct jpeg_decompress_struct cinfo;
+    struct jpeg_error_handler jerr;
+    cinfo.err = jpeg_std_error(&jerr.pub);
+
+    jerr.pub.error_exit = [](j_common_ptr cinfo) {
+        jpeg_error_handler* myerr = (jpeg_error_handler*)cinfo->err;
+        longjmp(myerr->setjmp_buffer, 1);
+    };
+
+    if(setjmp(jerr.setjmp_buffer)) {
+        printf("\033[31mFailed to decompress jpeg: %s\033[0m\n", jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
+        jpeg_destroy_decompress(&cinfo);
+        return EXIT_FAILURE ;
+    }
+    jpeg_create_decompress(&cinfo);
+
+    if(buffer.size() == 0) {
+        printf("buffer size is 0");
+        return EXIT_FAILURE;
+    }
+    jpeg_mem_src(&cinfo, (uchar*)buffer.data(), buffer.size());
+    if(jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
+        printf("\033[31mFailed to read jpeg header\033[0m\n");
+        return EXIT_FAILURE;
+    }
+    if(jpeg_start_decompress(&cinfo) != TRUE) {
+        printf("\033[31mFailed to start decompress\033[0m\n");
+        return EXIT_FAILURE;
+    }
+
+    while (cinfo.output_scanline < cinfo.output_height) {
+        auto row = cinfo.output_scanline;
+        uchar* ptr = output + row * cinfo.image_width * 3;
+        jpeg_read_scanlines(&cinfo, (JSAMPARRAY)&ptr, 1);
+    }
+    jpeg_finish_decompress(&cinfo);
+    jpeg_destroy_decompress(&cinfo);
+    return EXIT_SUCCESS ;
+}
+
+int decode_cpu(const std::vector<std::vector<char>> &files_data, uchar* out_imgs, size_t width, size_t height) {
+    uchar* temp = new uchar[width * height * 3];
+    uchar* temp_gpu = nullptr;
+    CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
+    for (size_t i = 0; i < files_data.size(); i++) {
+        if (decode_jpeg(files_data[i], temp)) {
+            return EXIT_FAILURE;
+        }
+        CHECK_CUDA(cudaMemcpy(temp_gpu, temp, width * height * 3, cudaMemcpyHostToDevice));
+        convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
+        CHECK_CUDA(cudaDeviceSynchronize());
+    }
+    CHECK_CUDA(cudaFree(temp_gpu));
+    delete[] temp;
+    return EXIT_SUCCESS;
+}
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
new file mode 100644
index 0000000000000..cd9a9de73bfd3
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -0,0 +1,195 @@
+#include <cstdio>
+#include <stdlib.h>
+
+#include <iostream>
+#include <fstream>
+#include <chrono>
+
+#include "data.hpp"
+#include "cpu_jpegdecoder.hpp"
+
+using std::chrono::high_resolution_clock;
+using std::chrono::duration;
+
+camParams::camParams(const YAML::Node &config, int n, std::vector<std::string> &cams_name) :
+                                                                    N_img(n){
+    
+    if((size_t)n != cams_name.size()){
+        std::cerr << "Error! Need " << n << " camera param, bug given " << cams_name.size() << " camera names!" << std::endl;
+    }
+    ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
+    ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
+
+    lidar2ego_rot = fromYamlQuater(config["lidar2ego_rotation"]);
+    lidar2ego_trans = fromYamlTrans(config["lidar2ego_translation"]);
+
+    timestamp = config["timestamp"].as<unsigned long long>();
+    scene_token = config["scene_token"].as<std::string>();
+
+    imgs_file.clear();
+
+    cams_intrin.clear();
+    cams2ego_rot.clear();
+    cams2ego_trans.clear();
+    
+    for(std::string name : cams_name){
+        imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
+
+        //
+        cams_intrin.push_back(fromYamlMatrix3f(config["cams"][name]["cam_intrinsic"]));
+        cams2ego_rot.push_back(fromYamlQuater(config["cams"][name]["sensor2ego_rotation"]));
+        cams2ego_trans.push_back(fromYamlTrans(config["cams"][name]["sensor2ego_translation"]));
+        //
+
+    }
+}
+
+
+DataLoader::DataLoader(int _n_img, 
+                       int _h, 
+                       int _w,
+                       const std::string &_data_infos_path,
+                       const std::vector<std::string> &_cams_name,
+                       bool _sep):
+                       n_img(_n_img),
+                       cams_intrin(_n_img), 
+                       cams2ego_rot(_n_img),
+                       cams2ego_trans(_n_img),
+#ifdef __HAVE_NVJPEG__
+                       nvdecoder(_n_img, DECODE_BGR),  
+#endif
+                       img_h(_h),
+                       img_w(_w),
+                       cams_name(_cams_name),
+                       data_infos_path(_data_infos_path),
+                       separate(_sep) {
+    
+
+    YAML::Node temp_seq = YAML::LoadFile(data_infos_path + "/time_sequence.yaml");
+    time_sequence = temp_seq["time_sequence"].as<std::vector<int>>();
+    sample_num = time_sequence.size();
+
+    cams_param.resize(sample_num);
+
+    if(separate == false){
+        YAML::Node infos = YAML::LoadFile(data_infos_path + "/samples_info.yaml");
+
+        for(size_t i = 0; i < cams_name.size(); i++){
+            cams_intrin[i] = fromYamlMatrix3f(infos[0]["cams"][cams_name[i]]["cam_intrinsic"]);
+            cams2ego_rot[i] = fromYamlQuater(infos[0]["cams"][cams_name[i]]
+                                                            ["sensor2ego_rotation"]);
+            cams2ego_trans[i] = fromYamlTrans(infos[0]["cams"][cams_name[i]]
+                                                            ["sensor2ego_translation"]);
+        }
+        lidar2ego_rot = fromYamlQuater(infos[0]["lidar2ego_rotation"]);
+        lidar2ego_trans = fromYamlTrans(infos[0]["lidar2ego_translation"]);
+
+        for(int i = 0; i < sample_num; i++){
+            cams_param[i] = camParams(infos[i], n_img, cams_name);
+        }
+    }
+    else{
+        YAML::Node config0 = YAML::LoadFile(data_infos_path + "/samples_info/sample0000.yaml");
+
+        for(size_t i = 0; i < cams_name.size(); i++){
+            cams_intrin[i] = fromYamlMatrix3f(config0["cams"][cams_name[i]]["cam_intrinsic"]);
+            cams2ego_rot[i] = fromYamlQuater(config0["cams"][cams_name[i]]
+                                                            ["sensor2ego_rotation"]);
+            cams2ego_trans[i] = fromYamlTrans(config0["cams"][cams_name[i]]
+                                                            ["sensor2ego_translation"]);
+        }
+        lidar2ego_rot = fromYamlQuater(config0["lidar2ego_rotation"]);
+        lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
+    }
+
+    CHECK_CUDA(cudaMalloc((void**)&imgs_dev, n_img * img_h * img_w * 3 * sizeof(uchar)));
+}
+
+
+const camsData& DataLoader::data(int idx, bool time_order){
+    if(time_order){
+        idx = time_sequence[idx];
+    }
+    printf("------time_sequence idx : %d ---------\n", idx);
+    if(separate == false){
+        cams_data.param = cams_param[idx];
+    }
+    else{
+        char str_idx[50];
+        sprintf(str_idx, "/samples_info/sample%04d.yaml", idx);
+        YAML::Node config_idx = YAML::LoadFile(data_infos_path + str_idx);
+        cams_data.param = camParams(config_idx, n_img, cams_name);
+    }
+    imgs_data.clear();
+    if(read_sample(cams_data.param.imgs_file, imgs_data)){
+        exit(1);
+    }
+#ifdef __HAVE_NVJPEG__
+    nvdecoder.decode(imgs_data, imgs_dev);
+#else
+    decode_cpu(imgs_data, imgs_dev, img_w, img_h);
+    printf("decode on cpu!\n");
+#endif
+    cams_data.imgs_dev = imgs_dev;
+    return cams_data;
+}
+
+DataLoader::~DataLoader(){
+    CHECK_CUDA(cudaFree(imgs_dev));
+}
+
+
+int read_image(std::string &image_names, std::vector<char> &raw_data){
+
+    std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
+
+    if (!(input.is_open())){
+        std::cerr << "Cannot open image: " << image_names << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::streamsize file_size = input.tellg();
+    input.seekg(0, std::ios::beg);
+    if (raw_data.size() < (size_t)file_size){
+        raw_data.resize(file_size);
+    }
+    if (!input.read(raw_data.data(), file_size)){
+        std::cerr << "Cannot read from file: " << image_names << std::endl;
+        return EXIT_FAILURE;
+    }
+    return EXIT_SUCCESS;
+}
+
+int read_sample(std::vector<std::string> &imgs_file, std::vector<std::vector<char>> &imgs_data){
+
+    imgs_data.resize(imgs_file.size());
+
+    for(size_t i = 0; i < imgs_data.size(); i++){
+        if(read_image(imgs_file[i], imgs_data[i])){
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+
+Eigen::Translation3f fromYamlTrans(YAML::Node x){
+    std::vector<float> trans = x.as<std::vector<float>>();
+    return Eigen::Translation3f(trans[0], trans[1], trans[2]);
+}
+
+Eigen::Quaternion<float> fromYamlQuater(YAML::Node x){
+    std::vector<float> quater = x.as<std::vector<float>>();
+    return Eigen::Quaternion<float>(quater[0], quater[1], quater[2], quater[3]);
+}
+
+Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x){
+    std::vector<std::vector<float>> m = x.as<std::vector<std::vector<float>>>();
+    Eigen::Matrix3f mat;
+    for(size_t i = 0; i < m.size(); i++){
+        for(size_t j = 0; j < m[0].size(); j++){
+            mat(i, j) = m[i][j];
+        }
+    }
+    return mat;
+}
diff --git a/perception/tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
new file mode 100644
index 0000000000000..16972cf8891bb
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gatherbev_plugin.hpp"
+#include "common.hpp"
+#include <cuda_fp16.h>
+#include <cuda_runtime_api.h>
+
+// input[0] == adj_feat   b x 8 x 80 x 128 x 128
+// input[1] == curr_feat  b x 80 x 128 x 128
+// input[2] == flag       b x 1
+// out[0]                 b x (8+1)*80 x 128 x 128
+template<typename T>
+__global__ void copy_feat_kernel(int nthreads, // b * (adj_num + 1) * map_size
+                                int adj_num,
+                                int channel,
+                                int map_size,
+                                const T* adj_feats,
+                                const T* curr_feat,
+                                const int* flag,
+                                T* out_feats){
+    CUDA_1D_KERNEL_LOOP(idx, nthreads){
+        int b = idx / ((adj_num + 1) * map_size);
+        int n = (idx / map_size) % (adj_num + 1);
+        int m = idx % map_size;
+
+        int start = b * (adj_num + 1) * channel * map_size + n * channel * map_size + m;
+        int end = start + channel * map_size;
+        for(int i = start, c = 0; i < end; i += map_size, c++){
+            if(flag[b] == 0 || n == 0){
+                out_feats[i] = curr_feat[b * channel * map_size + c * map_size + m];
+            }
+            else{
+                out_feats[i] = adj_feats[i - channel * map_size];
+            }
+        }
+    }
+}
+
+
+namespace nvinfer1
+{
+// class GatherBEVPlugin
+GatherBEVPlugin::GatherBEVPlugin(const std::string &name):
+    name_(name){
+}
+
+GatherBEVPlugin::GatherBEVPlugin(const std::string &name, const void *buffer, size_t length):
+    name_(name){
+    memcpy(&m_, buffer, sizeof(m_));
+}
+
+GatherBEVPlugin::~GatherBEVPlugin(){
+}
+
+IPluginV2DynamicExt *GatherBEVPlugin::clone() const noexcept {
+    auto p = new GatherBEVPlugin(name_, &m_, sizeof(m_));
+    p->setPluginNamespace(namespace_.c_str());
+    return p;
+}
+
+int32_t GatherBEVPlugin::getNbOutputs() const noexcept {
+    return 1;
+}
+ 
+DataType GatherBEVPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
+                                                                int32_t nbInputs) const noexcept {
+    return inputTypes[0];  // 与adj_feat一致
+}
+
+DimsExprs GatherBEVPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
+                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
+  // input[0] == adj_feat   b x 8 x 80 x 128 x 128
+  // input[1] == curr_feat  b x 80 x 128 x 128
+  // input[2] == flag       b x 1
+  // out[0]                 b x (8+1)*80 x 128 x 128
+    DimsExprs ret;
+    ret.nbDims = inputs[0].nbDims - 1;
+
+    IDimensionExpr const *n_feat = exprBuilder.operation(DimensionOperation::kSUM, 
+                                                        *inputs[0].d[1],
+                                                        *exprBuilder.constant(1));
+    ret.d[0] = inputs[0].d[0];
+    ret.d[1] = exprBuilder.operation(DimensionOperation::kPROD, *n_feat, *inputs[0].d[2]);
+    ret.d[2] = inputs[0].d[3];
+    ret.d[3] = inputs[0].d[4];
+
+    return ret; 
+}
+
+bool GatherBEVPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
+                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
+    // adj_feat
+    if(pos == 0){
+        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+                inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 1){ // curr_feat
+        return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 3){ // out
+        return inOut[0].type == inOut[3].type && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    else if(pos == 2){
+        return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
+    }
+    return false;
+}
+
+void GatherBEVPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
+                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
+    return;
+}
+
+size_t GatherBEVPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
+                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
+    return 0;
+}
+
+int32_t GatherBEVPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
+    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+
+  // input[0] == adj_feat   b x 8 x 80 x 128 x 128
+  // input[1] == curr_feat  b x 80 x 128 x 128
+  // input[2] == flag       b x 1
+  // out[0]                 b x (8+1)*80 x 128 x 128
+
+    // int nthreads, // b * (adj_num + 1) * map_size
+    // int adj_num,
+    // int channel,
+    // int map_size,
+
+    // int flag = 0;
+    // CHECK_CUDA(cudaMemcpy(&flag, inputs[2], sizeof(int), cudaMemcpyDeviceToHost));
+
+    int b = inputDesc[0].dims.d[0];
+    int adj_num = inputDesc[0].dims.d[1];
+    int map_size = inputDesc[0].dims.d[3] * inputDesc[0].dims.d[4];
+    int channel = inputDesc[0].dims.d[2];
+
+    int feat_step = inputDesc[1].dims.d[1] * inputDesc[1].dims.d[2] * inputDesc[1].dims.d[3];
+
+    int nthreads = b * (adj_num + 1) * map_size;
+
+    dim3 grid(GET_BLOCKS(nthreads));
+    dim3 block(NUM_THREADS);
+    // printf("GatherBEV input adj_feats %s\n", dataTypeToString(inputDesc[0].type).c_str());
+    // printf("GatherBEV input curr_feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
+    // printf("GatherBEV output bevfeats %s\n", dataTypeToString(outputDesc[0].type).c_str());
+
+
+    switch (int(outputDesc[0].type))
+    {
+    case int(DataType::kFLOAT):
+        // printf("gather : fp32\n");
+        copy_feat_kernel<<<grid, block, 0, stream>>>(nthreads,
+                                                    adj_num,
+                                                    channel,
+                                                    map_size,
+                                                    reinterpret_cast<const float*>(inputs[0]),
+                                                    reinterpret_cast<const float*>(inputs[1]),
+                                                    reinterpret_cast<const int*>(inputs[2]),
+                                                    reinterpret_cast<float*>(outputs[0]));
+        
+
+        break;
+    case int(DataType::kHALF):
+        // printf("gather : fp16\n");
+        copy_feat_kernel<<<grid, block, 0, stream>>>(nthreads,
+                                                    adj_num,
+                                                    channel,
+                                                    map_size,
+                                                    reinterpret_cast<const __half*>(inputs[0]),
+                                                    reinterpret_cast<const __half*>(inputs[1]),
+                                                    reinterpret_cast<const int*>(inputs[2]),
+                                                    reinterpret_cast<__half*>(outputs[0]));
+        break;
+    default: // should NOT be here
+        printf("\tUnsupport datatype!\n");
+    }
+    return 0;
+}
+
+void GatherBEVPlugin::destroy() noexcept {
+    delete this;
+    return;
+}
+
+int32_t GatherBEVPlugin::initialize() noexcept {
+    return 0;
+}
+
+void GatherBEVPlugin::terminate() noexcept {
+    return;
+}
+
+size_t GatherBEVPlugin::getSerializationSize() const noexcept {
+    return sizeof(m_);
+}
+
+void GatherBEVPlugin::serialize(void *buffer) const noexcept {
+    memcpy(buffer, &m_, sizeof(m_));
+    return;
+}
+
+void GatherBEVPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *GatherBEVPlugin::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *GatherBEVPlugin::getPluginType() const noexcept {
+    return GATHERBEV_PLUGIN_NAME;
+}
+
+const char *GatherBEVPlugin::getPluginVersion() const noexcept {
+    return GATHERBEV_PLUGIN_VERSION;
+}
+
+void GatherBEVPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
+                                                        IGpuAllocator *gpuAllocator) noexcept {
+    return;
+}
+
+void GatherBEVPlugin::detachFromContext() noexcept {
+    return;
+}
+
+// class GatherBEVPluginCreator
+PluginFieldCollection    GatherBEVPluginCreator::fc_ {};
+std::vector<PluginField> GatherBEVPluginCreator::attr_;
+
+GatherBEVPluginCreator::GatherBEVPluginCreator() {
+    attr_.clear();
+    fc_.nbFields = attr_.size();
+    fc_.fields   = attr_.data();
+}
+
+GatherBEVPluginCreator::~GatherBEVPluginCreator() {
+}
+
+
+IPluginV2DynamicExt *GatherBEVPluginCreator::createPlugin(const char *name, 
+                                    const PluginFieldCollection *fc) noexcept {
+    GatherBEVPlugin *pObj = new GatherBEVPlugin(name);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+IPluginV2DynamicExt *GatherBEVPluginCreator::deserializePlugin(const char *name, 
+                                        const void *serialData, size_t serialLength) noexcept {
+    GatherBEVPlugin *pObj = new GatherBEVPlugin(name, serialData, serialLength);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+void GatherBEVPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *GatherBEVPluginCreator::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *GatherBEVPluginCreator::getPluginName() const noexcept {
+    return GATHERBEV_PLUGIN_NAME;
+}
+
+const char *GatherBEVPluginCreator::getPluginVersion() const noexcept {
+    return GATHERBEV_PLUGIN_VERSION;
+}
+
+const PluginFieldCollection *GatherBEVPluginCreator::getFieldNames() noexcept {
+    return &fc_;
+}
+
+REGISTER_TENSORRT_PLUGIN(GatherBEVPluginCreator);
+
+} // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
new file mode 100644
index 0000000000000..ef532e43049f6
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -0,0 +1,596 @@
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2022.
+*/
+
+#include "iou3d_nms.hpp"
+
+struct Point {
+  float x, y;
+  __device__ Point() {}
+  __device__ Point(double _x, double _y) { x = _x, y = _y; }
+
+  __device__ void set(float _x, float _y) {
+    x = _x;
+    y = _y;
+  }
+
+  __device__ Point operator+(const Point& b) const {
+    return Point(x + b.x, y + b.y);
+  }
+
+  __device__ Point operator-(const Point& b) const {
+    return Point(x - b.x, y - b.y);
+  }
+};
+
+__device__ inline float cross(const Point& a, const Point& b) {
+  return a.x * b.y - a.y * b.x;
+}
+
+__device__ inline float cross(const Point& p1, const Point& p2, const Point& p0) {
+  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
+}
+
+__device__ int check_rect_cross(const Point& p1, const Point& p2,
+                                const Point& q1, const Point& q2) {
+  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
+            min(q1.x, q2.x) <= max(p1.x, p2.x) &&
+            min(p1.y, p2.y) <= max(q1.y, q2.y) &&
+            min(q1.y, q2.y) <= max(p1.y, p2.y);
+  return ret;
+}
+
+__device__ inline int check_in_box2d(const float* box, const Point& p) {
+  // params: (7) [x, y, z, dx, dy, dz, heading]
+  const float kMargin = 1e-2;
+
+  float center_x = box[0], center_y = box[1];
+  // rotate the point in the opposite direction of box
+  float angle_cos = cos(-box[6]);
+  float angle_sin = sin(-box[6]);
+  float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
+  float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
+
+  return (fabs(rot_x) < box[3] / 2 + kMargin &&
+          fabs(rot_y) < box[4] / 2 + kMargin);
+}
+
+__device__ inline int intersection(const Point& p1, const Point& p0,
+                                   const Point& q1, const Point& q0,
+                                   Point& ans) {
+  // fast exclusion
+  if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
+
+  // check cross standing
+  float s1 = cross(q0, p1, p0);
+  float s2 = cross(p1, q1, p0);
+  float s3 = cross(p0, q1, q0);
+  float s4 = cross(q1, p1, q0);
+
+  if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;
+
+  // calculate intersection of two lines
+  float s5 = cross(q1, p1, p0);
+  if (fabs(s5 - s1) > EPS) {
+    ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
+    ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
+
+  } else {
+    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
+    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
+    float D = a0 * b1 - a1 * b0;
+
+    ans.x = (b0 * c1 - b1 * c0) / D;
+    ans.y = (a1 * c0 - a0 * c1) / D;
+  }
+
+  return 1;
+}
+
+__device__ inline void rotate_around_center(const Point& center,
+                                            const float angle_cos,
+                                            const float angle_sin, 
+                                            Point& p) {
+  float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
+  float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
+  p.set(new_x, new_y);
+}
+
+__device__ inline int point_cmp(const Point& a, const Point& b,
+                                const Point& center) {
+  return atan2(a.y - center.y, a.x - center.x) >
+         atan2(b.y - center.y, b.x - center.x);
+}
+
+__device__ inline float box_overlap(const float* box_a, const float* box_b) {
+  // params box_a: [x, y, z, dx, dy, dz, heading]
+  // params box_b: [x, y, z, dx, dy, dz, heading]
+
+  float a_angle = box_a[6], b_angle = box_b[6];
+  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
+        a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
+  float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
+  float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
+  float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
+  float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
+
+  Point center_a(box_a[0], box_a[1]);
+  Point center_b(box_b[0], box_b[1]);
+
+#ifdef DEBUG
+  printf(
+      "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n",
+      a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
+  printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y,
+         center_b.x, center_b.y);
+#endif
+
+  Point box_a_corners[5];
+  box_a_corners[0].set(a_x1, a_y1);
+  box_a_corners[1].set(a_x2, a_y1);
+  box_a_corners[2].set(a_x2, a_y2);
+  box_a_corners[3].set(a_x1, a_y2);
+
+  Point box_b_corners[5];
+  box_b_corners[0].set(b_x1, b_y1);
+  box_b_corners[1].set(b_x2, b_y1);
+  box_b_corners[2].set(b_x2, b_y2);
+  box_b_corners[3].set(b_x1, b_y2);
+
+  // get oriented corners
+  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
+  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
+
+  for (int k = 0; k < 4; k++) {
+#ifdef DEBUG
+    printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k,
+           box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x,
+           box_b_corners[k].y);
+#endif
+    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
+    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
+#ifdef DEBUG
+    printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
+           box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
+#endif
+  }
+
+  box_a_corners[4] = box_a_corners[0];
+  box_b_corners[4] = box_b_corners[0];
+
+  // get intersection of lines
+  Point cross_points[16];
+  Point poly_center;
+  int cnt = 0, flag = 0;
+
+  poly_center.set(0, 0);
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      flag = intersection(box_a_corners[i + 1], box_a_corners[i],
+                          box_b_corners[j + 1], box_b_corners[j],
+                          cross_points[cnt]);
+      if (flag) {
+        poly_center = poly_center + cross_points[cnt];
+        cnt++;
+#ifdef DEBUG
+        printf(
+            "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
+            "%.3f)->(%.3f, %.3f) \n",
+            cross_points[cnt - 1].x, cross_points[cnt - 1].y,
+            box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x,
+            box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
+            box_b_corners[i + 1].x, box_b_corners[i + 1].y);
+#endif
+      }
+    }
+  }
+
+  // check corners
+  for (int k = 0; k < 4; k++) {
+    if (check_in_box2d(box_a, box_b_corners[k])) {
+      poly_center = poly_center + box_b_corners[k];
+      cross_points[cnt] = box_b_corners[k];
+      cnt++;
+#ifdef DEBUG
+      printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x,
+             cross_points[cnt - 1].y);
+#endif
+    }
+    if (check_in_box2d(box_b, box_a_corners[k])) {
+      poly_center = poly_center + box_a_corners[k];
+      cross_points[cnt] = box_a_corners[k];
+      cnt++;
+#ifdef DEBUG
+      printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x,
+             cross_points[cnt - 1].y);
+#endif
+    }
+  }
+
+  poly_center.x /= cnt;
+  poly_center.y /= cnt;
+
+  // sort the points of polygon
+  Point temp;
+  for (int j = 0; j < cnt - 1; j++) {
+    for (int i = 0; i < cnt - j - 1; i++) {
+      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
+        temp = cross_points[i];
+        cross_points[i] = cross_points[i + 1];
+        cross_points[i + 1] = temp;
+      }
+    }
+  }
+
+#ifdef DEBUG
+  printf("cnt=%d\n", cnt);
+  for (int i = 0; i < cnt; i++) {
+    printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x,
+           cross_points[i].y);
+  }
+#endif
+
+  // get the overlap areas
+  float area = 0;
+  for (int k = 0; k < cnt - 1; k++) {
+    area += cross(cross_points[k] - cross_points[0],
+                  cross_points[k + 1] - cross_points[0]);
+  }
+
+  return fabs(area) / 2.0;
+}
+
+__device__ inline float iou_bev(const float* box_a, const float* box_b) {
+  // params box_a: [x, y, z, dx, dy, dz, heading]
+  // params box_b: [x, y, z, dx, dy, dz, heading]
+  float sa = box_a[3] * box_a[4];
+  float sb = box_b[3] * box_b[4];
+  float s_overlap = box_overlap(box_a, box_b);
+  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
+}
+
+__device__ inline float iou_normal(float const* const a, float const* const b) {
+  // params: a: [x, y, z, dx, dy, dz, heading]
+  // params: b: [x, y, z, dx, dy, dz, heading]
+  float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
+        right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2);
+  float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2),
+        bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2);
+  float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
+  float interS = width * height;
+  float Sa = a[3] * a[4];
+  float Sb = b[3] * b[4];
+  return interS / fmaxf(Sa + Sb - interS, EPS);
+}
+
+__global__ void boxes_overlap_kernel(const int num_a, const float* boxes_a,
+                                     const int num_b, const float* boxes_b,
+                                     float* ans_overlap) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+
+  if (a_idx >= num_a || b_idx >= num_b) {
+    return;
+  }
+  const float* cur_box_a = boxes_a + a_idx * kBoxBlockSize;
+  const float* cur_box_b = boxes_b + b_idx * kBoxBlockSize;
+  float s_overlap = box_overlap(cur_box_a, cur_box_b);
+  ans_overlap[a_idx * num_b + b_idx] = s_overlap;
+}
+
+__global__ void boxes_iou_bev_kernel(const int num_a, const float* boxes_a,
+                                     const int num_b, const float* boxes_b,
+                                     float* ans_iou) {
+  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
+  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+
+  if (a_idx >= num_a || b_idx >= num_b) {
+    return;
+  }
+
+  const float* cur_box_a = boxes_a + a_idx * kBoxBlockSize;
+  const float* cur_box_b = boxes_b + b_idx * kBoxBlockSize;
+  float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
+  ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
+}
+
+__global__ void RotatedNmsKernel(const int boxes_num,
+                                 const float nms_overlap_thresh,
+                                 const float* boxes, 
+                                 unsigned long long* mask) {
+  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * kBoxBlockSize + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
+    block_boxes[threadIdx.x * kBoxBlockSize + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
+    block_boxes[threadIdx.x * kBoxBlockSize + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
+    block_boxes[threadIdx.x * kBoxBlockSize + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
+    block_boxes[threadIdx.x * kBoxBlockSize + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
+    block_boxes[threadIdx.x * kBoxBlockSize + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
+    block_boxes[threadIdx.x * kBoxBlockSize + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const float* cur_box = boxes + cur_box_idx * kBoxBlockSize;
+
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+__global__ void NmsKernel(const int boxes_num,
+                          const float nms_overlap_thresh,
+                          const float* boxes,
+                          unsigned long long* mask) {
+  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
+  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * kBoxBlockSize + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
+    block_boxes[threadIdx.x * kBoxBlockSize + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
+    block_boxes[threadIdx.x * kBoxBlockSize + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
+    block_boxes[threadIdx.x * kBoxBlockSize + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
+    block_boxes[threadIdx.x * kBoxBlockSize + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
+    block_boxes[threadIdx.x * kBoxBlockSize + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
+    block_boxes[threadIdx.x * kBoxBlockSize + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const float* cur_box = boxes + cur_box_idx * kBoxBlockSize;
+
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (iou_normal(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+__global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
+                                            const float nms_overlap_thresh,
+                                            const float* res_box,
+                                            const int* res_sorted_indices,
+                                            unsigned long long* mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
+
+  if (threadIdx.x < col_size) { // 419, 420有两种情况，分情况讨论
+    const int col_actual_idx = res_sorted_indices[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
+    block_boxes[threadIdx.x * kBoxBlockSize + 0] = res_box[col_actual_idx * kBoxBlockSize + 0];
+    block_boxes[threadIdx.x * kBoxBlockSize + 1] = res_box[col_actual_idx * kBoxBlockSize + 1];
+    block_boxes[threadIdx.x * kBoxBlockSize + 2] = res_box[col_actual_idx * kBoxBlockSize + 2];
+    block_boxes[threadIdx.x * kBoxBlockSize + 3] = res_box[col_actual_idx * kBoxBlockSize + 3];
+    block_boxes[threadIdx.x * kBoxBlockSize + 4] = res_box[col_actual_idx * kBoxBlockSize + 4];
+    block_boxes[threadIdx.x * kBoxBlockSize + 5] = res_box[col_actual_idx * kBoxBlockSize + 5];
+    block_boxes[threadIdx.x * kBoxBlockSize + 6] = res_box[col_actual_idx * kBoxBlockSize + 6];
+  }
+  __syncthreads(); // 等待块内的线程
+
+  if (threadIdx.x < row_size) { //
+    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const int row_actual_idx = res_sorted_indices[cur_box_idx];
+
+    float cur_box[kBoxBlockSize];
+    cur_box[0] = res_box[row_actual_idx * kBoxBlockSize + 0];
+    cur_box[1] = res_box[row_actual_idx * kBoxBlockSize + 1];
+    cur_box[2] = res_box[row_actual_idx * kBoxBlockSize + 2];
+    cur_box[3] = res_box[row_actual_idx * kBoxBlockSize + 3];
+    cur_box[4] = res_box[row_actual_idx * kBoxBlockSize + 4];
+    cur_box[5] = res_box[row_actual_idx * kBoxBlockSize + 5];
+    cur_box[6] = res_box[row_actual_idx * kBoxBlockSize + 6];
+
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;  // 此时不和 [0 ~ threadIdx.x) 的比吗? FIXME
+    }                           // 认为这里是正确的，都不需要和小于自己序号的比，如果col小于row,也不需要比
+    for (i = start; i < col_size; i++) {
+      if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+
+    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+    // assume cur_box_idx = 21, col_start = 0, row_start = 0 , threadIdx = 21,
+    // mark 21 th box and top 64 boxes
+    mask[cur_box_idx * col_blocks + col_start] = t;
+    // 也可以 col_start * boxes_num + cur_box_idx 
+  }
+}
+
+__global__ void box_assign_kernel(float* reg, float* height, float* dim,
+                                  float* rot, float* boxes, float* score,
+                                  int* label, float* out_score, int* out_label,
+                                  int* validIndexs, int head_x_size,
+                                  int head_y_size) {
+  int boxId = blockIdx.x;
+  int channel = threadIdx.x;
+  int idx = validIndexs[boxId];
+  if (channel == 0) { boxes[boxId * kBoxBlockSize + 0] = reg[idx]; } 
+  else if (channel == 1) { boxes[boxId * kBoxBlockSize + 1] = reg[idx + head_x_size * head_y_size]; } 
+  else if (channel == 2) { boxes[boxId * kBoxBlockSize + 2] = height[idx]; } 
+  else if (channel == 3) { boxes[boxId * kBoxBlockSize + 3] = dim[idx]; } 
+  else if (channel == 4) { boxes[boxId * kBoxBlockSize + 4] = dim[idx + head_x_size * head_y_size]; } 
+  else if (channel == 5) { boxes[boxId * kBoxBlockSize + 5] = dim[idx + 2 * head_x_size * head_y_size]; } 
+  else if (channel == 6) {
+    float theta = atan2f(rot[0 * head_x_size * head_y_size + idx],
+                         rot[1 * head_x_size * head_y_size + idx]);
+    theta = -theta - 3.1415926 / 2;
+    boxes[boxId * kBoxBlockSize + 6] = theta;
+  }
+  // else if(channel == 7)
+  // out_score[boxId] = score[idx];
+  else if (channel == 8) { out_label[boxId] = label[idx]; }
+}
+
+void box_assign_launcher(float* reg, float* height, float* dim, float* rot,
+                         float* boxes, float* score, int* label,
+                         float* out_score, int* out_label, int* validIndexs,
+                         int boxSize, int head_x_size, int head_y_size) {
+  box_assign_kernel<<<boxSize, 9>>>(reg, height, dim, rot, boxes, score, label,
+                                    out_score, out_label, validIndexs, head_x_size,
+                                    head_y_size);
+}
+
+__global__ void index_assign(int* indexs) {
+  int yIdx = blockIdx.x;
+  int xIdx = threadIdx.x;
+  int idx = yIdx * blockDim.x + xIdx;
+  indexs[idx] = idx;
+}
+
+void index_assign_launcher(int* indexs, int head_x_size, int head_y_size) {
+  index_assign<<<head_x_size, head_y_size>>>(indexs);
+}
+
+void boxes_overlap_launcher(const int num_a, const float* boxes_a,
+                            const int num_b, const float* boxes_b,
+                            float* ans_overlap) {
+  dim3 blocks(
+      DIVUP(num_b, THREADS_PER_BLOCK),
+      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+
+  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
+                                            ans_overlap);
+#ifdef DEBUG
+  cudaDeviceSynchronize();  // for using printf in kernel function
+#endif
+}
+
+void boxes_iou_bev_launcher(const int num_a, const float* boxes_a,
+                            const int num_b, const float* boxes_b,
+                            float* ans_iou) {
+  dim3 blocks(
+      DIVUP(num_b, THREADS_PER_BLOCK),
+      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+
+  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
+                                            ans_iou);
+#ifdef DEBUG
+  cudaDeviceSynchronize();  // for using printf in kernel function
+#endif
+}
+
+void nms_launcher(const float* boxes, unsigned long long* mask, int boxes_num,
+                  float nms_overlap_thresh) {
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
+              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+
+  RotatedNmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
+}
+
+void nms_normal_launcher(const float* boxes, unsigned long long* mask,
+                         int boxes_num, float nms_overlap_thresh) {
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
+              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  NmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes,
+                                         mask);
+}
+
+
+Iou3dNmsCuda::Iou3dNmsCuda(const int head_x_size, 
+                           const int head_y_size, 
+                           const float nms_overlap_thresh)
+    : head_x_size_(head_x_size),
+      head_y_size_(head_y_size),
+      nms_overlap_thresh_(nms_overlap_thresh) {}
+
+int Iou3dNmsCuda::DoIou3dNms(const int box_num_pre,
+                             const float* res_box,
+                             const int* res_sorted_indices,
+                             long* host_keep_data) {
+  const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);      // 划分多少块
+  // printf("box_num_pre=%d, col_blocks=%d\n", box_num_pre, col_blocks);
+  unsigned long long* dev_mask = NULL;
+  cudaMalloc((void**)&dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long));  // 
+  // THREADS_PER_BLOCK_NMS 掩码的长度
+  dim3 blocks(DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS),
+              DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS));
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  RotatedNmsWithIndicesKernel<<<blocks, threads>>>(box_num_pre,
+                                                   nms_overlap_thresh_,
+                                                   res_box,
+                                                   res_sorted_indices,
+                                                   dev_mask);
+
+  unsigned long long host_mask[box_num_pre * col_blocks];
+  cudaMemcpy(host_mask, dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long), cudaMemcpyDeviceToHost);
+  cudaFree(dev_mask);
+
+  unsigned long long host_remv[col_blocks];
+  memset(host_remv, 0, col_blocks * sizeof(unsigned long long));
+  int num_to_keep = 0;
+  for (int i = 0; i < box_num_pre; i++) {
+    int nblock = i / THREADS_PER_BLOCK_NMS;
+    int inblock = i % THREADS_PER_BLOCK_NMS;
+    if (!(host_remv[nblock] & (1ULL << inblock))) { // 满足此要求说明和前面选中的所有box都不冲突
+      host_keep_data[num_to_keep++] = i;           // 把满足的box加入
+      for (int j = nblock; j < col_blocks; j++) { // 把和序号i的box，overlap超过阈值的都考虑上
+        host_remv[j] |= host_mask[i * col_blocks + j];
+      }
+    }
+  }
+
+  if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
+  return num_to_keep;
+}
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
new file mode 100644
index 0000000000000..2d889e8539cb8
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -0,0 +1,268 @@
+#ifdef __HAVE_NVJPEG__
+
+#include <chrono>
+#include "nvjpegdecoder.hpp"
+
+using std::chrono::high_resolution_clock;
+using std::chrono::duration;
+
+int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); }
+
+int dev_free(void *p) { return (int)cudaFree(p); }
+
+int host_malloc(void **p, size_t s, unsigned int f) { return (int)cudaHostAlloc(p, s, f); }
+
+int host_free(void *p) { return (int)cudaFreeHost(p); }
+
+
+int prepare_buffers(const std::vector<std::vector<char>> &file_data, 
+                    std::vector<int> &img_width, std::vector<int> &img_height,
+                    std::vector<nvjpegImage_t> &ibuf, std::vector<nvjpegImage_t> &isz, 
+                    share_params &share_param) {
+    int widths[NVJPEG_MAX_COMPONENT];
+    int heights[NVJPEG_MAX_COMPONENT];
+    int channels;
+    nvjpegChromaSubsampling_t subsampling;
+
+    for (size_t i = 0; i < file_data.size(); i++) {
+        CHECK_NVJPEG(nvjpegGetImageInfo(
+            share_param.nvjpeg_handle, (uchar *)file_data[i].data(), file_data[i].size(),
+            &channels, &subsampling, widths, heights));
+
+        int mul = 1;
+        // in the case of interleaved RGB output, write only to single channel, but
+        // 3 samples at once
+        if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
+            channels = 1;
+            mul = 3;
+        }
+        // in the case of rgb create 3 buffers with sizes of original image
+        else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
+            channels = 3;
+            widths[1] = widths[2] = widths[0];
+            heights[1] = heights[2] = heights[0];
+        }
+
+        if(img_width[i] != widths[0] || img_height[i] != heights[0]){
+            img_width[i] = widths[0];
+            img_height[i] = heights[0];
+            // realloc output buffer if required
+            for (int c = 0; c < channels; c++) {
+                int aw = mul * widths[c];
+                int ah = heights[c];
+                size_t sz = aw * ah;
+                ibuf[i].pitch[c] = aw;
+                if (sz > isz[i].pitch[c]) {
+                    if (ibuf[i].channel[c]) {
+                        CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
+                    }
+                    CHECK_CUDA(cudaMalloc((void **)&ibuf[i].channel[c], sz));
+                    isz[i].pitch[c] = sz;
+                }
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+void create_decoupled_api_handles(std::vector<decode_params_t> &params, share_params &share_param){
+
+    for(size_t i = 0; i < params.size(); i++){
+        CHECK_NVJPEG(nvjpegDecoderCreate(share_param.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &params[i].nvjpeg_decoder));
+        CHECK_NVJPEG(nvjpegDecoderStateCreate(share_param.nvjpeg_handle, params[i].nvjpeg_decoder, &params[i].nvjpeg_decoupled_state));
+
+        CHECK_NVJPEG(nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[0]));
+        CHECK_NVJPEG(nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[1]));
+        CHECK_NVJPEG(nvjpegBufferDeviceCreate(share_param.nvjpeg_handle, NULL, &params[i].device_buffer));
+
+        CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[0]));
+        CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[1]));
+
+
+        CHECK_NVJPEG(nvjpegDecodeParamsCreate(share_param.nvjpeg_handle, &params[i].nvjpeg_decode_params));
+    }
+}
+
+void destroy_decoupled_api_handles(std::vector<decode_params_t> &params, share_params &share_param)
+{
+    for(size_t i = 0; i < params.size(); i++){
+        CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params[i].nvjpeg_decode_params));
+
+        CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[0]));
+        CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[1]));
+        CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[0]));
+        CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[1]));
+        CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params[i].device_buffer));
+
+        CHECK_NVJPEG(nvjpegJpegStateDestroy(params[i].nvjpeg_decoupled_state));
+        CHECK_NVJPEG(nvjpegDecoderDestroy(params[i].nvjpeg_decoder));
+    }
+}
+
+void release_buffers(std::vector<nvjpegImage_t> &ibuf){   
+    for(size_t i = 0; i < ibuf.size(); i++){
+        for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++)
+            if (ibuf[i].channel[c])
+                CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
+    }
+}
+
+int get_img(const uchar *d_chanR, int pitchR, 
+            const uchar *d_chanG, int pitchG,
+            const uchar *d_chanB, int pitchB, 
+            size_t width, size_t height, uchar* img){
+    
+    size_t step = width * height;
+
+    CHECK_CUDA(cudaMemcpy2D(img + 0 * step, (size_t)width, d_chanR, (size_t)pitchR,
+                               width, height, cudaMemcpyDeviceToDevice));
+    CHECK_CUDA(cudaMemcpy2D(img + 1 * step, (size_t)width, d_chanG, (size_t)pitchG,
+                               width, height, cudaMemcpyDeviceToDevice));
+    CHECK_CUDA(cudaMemcpy2D(img + 2 * step, (size_t)width, d_chanB, (size_t)pitchB,
+                               width, height, cudaMemcpyDeviceToDevice));
+
+    return EXIT_SUCCESS;
+}
+
+
+
+void decode_single_image(const std::vector<char> &img_data, nvjpegImage_t &out, decode_params_t &params, 
+                                                                share_params &share_param, double &time){
+
+    CHECK_CUDA(cudaStreamSynchronize(params.stream));
+    cudaEvent_t startEvent = NULL, stopEvent = NULL;
+    float loopTime = 0;
+    // cudaEventBlockingSync
+    CHECK_CUDA(cudaEventCreate(&startEvent));
+    CHECK_CUDA(cudaEventCreate(&stopEvent));
+
+    CHECK_CUDA(cudaEventRecord(startEvent, params.stream));
+
+
+    CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, params.device_buffer));
+    int buffer_index = 0;
+    CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
+
+    CHECK_NVJPEG(nvjpegJpegStreamParse(share_param.nvjpeg_handle, 
+                (const uchar *)img_data.data(),  img_data.size(), 0, 0, 
+                params.jpeg_streams[buffer_index]));
+
+    CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(params.nvjpeg_decoupled_state,
+                                                params.pinned_buffers[buffer_index]));
+
+    CHECK_NVJPEG(nvjpegDecodeJpegHost(share_param.nvjpeg_handle, params.nvjpeg_decoder, 
+                params.nvjpeg_decoupled_state,  params.nvjpeg_decode_params, 
+                 params.jpeg_streams[buffer_index]));
+
+    CHECK_CUDA(cudaStreamSynchronize(params.stream));
+
+    CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(share_param.nvjpeg_handle, 
+                    params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
+                    params.jpeg_streams[buffer_index], params.stream));
+
+    buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode to avoid an extra sync
+
+    CHECK_NVJPEG(nvjpegDecodeJpegDevice(share_param.nvjpeg_handle, params.nvjpeg_decoder, 
+                    params.nvjpeg_decoupled_state, &out, params.stream));
+    
+    
+    // CHECK_CUDA(cudaStreamSynchronize(params.stream)); //TODO new add
+
+    CHECK_CUDA(cudaEventRecord(stopEvent, params.stream));
+
+    CHECK_CUDA(cudaEventSynchronize(stopEvent));
+    CHECK_CUDA(cudaEventElapsedTime(&loopTime, startEvent, stopEvent));
+    time = 0.001 * static_cast<double>(loopTime); // cudaEventElapsedTime returns milliseconds
+    // time = 1.0;
+}
+
+
+nvjpegDecoder::nvjpegDecoder(size_t n, size_t _fmt) : N_img(n), iout(n), isz(n), widths(n), heights(n), params(n), share_param(_fmt){
+    init();
+}
+
+
+int nvjpegDecoder::decode(const std::vector<std::vector<char>> &files_data, uchar* out_imgs){    
+
+    for(size_t i = 0; i < params.size(); i++){
+        CHECK_CUDA(cudaStreamCreate(&params[i].stream));
+    }
+    if (prepare_buffers(files_data, widths, heights, iout, isz, share_param))
+        return EXIT_FAILURE;
+
+    double total_time = 0;
+    double times[6];
+
+    auto decode_start = high_resolution_clock::now();
+    std::vector<std::thread> threads(files_data.size());
+    for(size_t i = 0; i < files_data.size(); i++){
+        threads[i] = std::thread(decode_single_image, std::ref(files_data[i]), std::ref(iout[i]), std::ref(params[i]), std::ref(share_param), std::ref(times[i]));
+    }
+    for(size_t i = 0; i < files_data.size(); i++){
+        threads[i].join();
+        total_time += times[i];
+    }
+    auto decode_end = high_resolution_clock::now();
+    duration<double> decode_time = decode_end - decode_start;
+    
+    printf("Decode total time : %.4lf ms\n", decode_time.count() * 1000);
+
+    for(size_t i = 0; i < files_data.size(); i++){
+        get_img(iout[i].channel[0], iout[i].pitch[0],
+                iout[i].channel[1], iout[i].pitch[1], 
+                iout[i].channel[2], iout[i].pitch[2], 
+                widths[i], heights[i], out_imgs + i * 3 * widths[i] * heights[i]);
+    }
+
+    
+    for(size_t i = 0; i < params.size(); i++){
+        CHECK_CUDA(cudaStreamDestroy(params[i].stream));
+    }
+    return EXIT_SUCCESS;
+}
+
+
+
+int nvjpegDecoder::init(){
+  
+    nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
+    nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
+
+    nvjpegStatus_t status = nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator,
+                                           &pinned_allocator, NVJPEG_FLAGS_DEFAULT, 
+                                           &share_param.nvjpeg_handle); 
+    share_param.hw_decode_available = true;
+    if (status == NVJPEG_STATUS_ARCH_MISMATCH){
+        std::cout << "Hardware Decoder not supported. Falling back to default backend" << std::endl;
+        CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, 
+                                    NVJPEG_FLAGS_DEFAULT, &share_param.nvjpeg_handle));
+        share_param.hw_decode_available = false;
+    }
+    else{
+        CHECK_NVJPEG(status);
+    }
+
+    CHECK_NVJPEG(nvjpegJpegStateCreate(share_param.nvjpeg_handle, &share_param.nvjpeg_state));
+
+    create_decoupled_api_handles(params, share_param);
+
+    for(size_t i = 0; i < iout.size(); i++){
+        for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++){
+            iout[i].channel[c] = NULL;
+            iout[i].pitch[c] = 0;
+            isz[i].pitch[c] = 0;
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+
+nvjpegDecoder::~nvjpegDecoder(){ 
+    release_buffers(iout);
+    destroy_decoupled_api_handles(params, share_param);
+    CHECK_NVJPEG(nvjpegJpegStateDestroy(share_param.nvjpeg_state));
+    CHECK_NVJPEG(nvjpegDestroy(share_param.nvjpeg_handle));
+}
+
+
+#endif
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/tensorrt_bevdet/src/postprocess.cu
new file mode 100644
index 0000000000000..d2be6b01dfbe8
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/postprocess.cu
@@ -0,0 +1,223 @@
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+
+#include "postprocess.hpp"
+
+
+__device__ float sigmoid_gpu(const float x) { return 1.0f / (1.0f + expf(-x)); }
+
+__global__ void BEVDecodeObjectKernel(const int map_size,         // 40000
+                                   const float score_thresh,   // 0.1
+                                //    const int nms_pre_max_size, // 4096
+                                   const float x_start,
+                                   const float y_start,
+                                   const float x_step,
+                                   const float y_step,
+                                   const int output_h,
+                                   const int output_w,
+                                   const int downsample_size,
+                                   const int num_class_in_task,
+                                   const int cls_range,
+                                   const float* reg,
+                                   const float* hei,
+                                   const float* dim,
+                                   const float* rot,
+                                   const float* vel,
+                                   const float* cls,
+                                   float* res_box,
+                                   float* res_conf,
+                                   int* res_cls,
+                                   int* res_box_num,
+                                   float* rescale_factor){  // 根据置信度，初筛，筛选后有res_box_num个box，不超过nms_pre_max_size 4096
+    int idx = threadIdx.x + blockDim.x * blockIdx.x;
+    if (idx >= map_size) return;
+
+    float max_score = cls[idx]; // 初始化为task的第一个类
+    int label = cls_range;      // 初始化为task的第一个类
+    for (int i = 1; i < num_class_in_task; ++i) {
+        float cur_score = cls[idx + i * map_size];
+        if (cur_score > max_score){
+            max_score = cur_score;
+            label = i + cls_range;
+        }
+    }
+
+    int coor_x = idx % output_h;  //
+    int coor_y = idx / output_w;  //
+
+    float conf = sigmoid_gpu(max_score); // 计算置信度
+    if (conf > score_thresh){
+        int cur_valid_box_id = atomicAdd(res_box_num, 1);
+        res_box[cur_valid_box_id * kBoxBlockSize + 0] = 
+            (reg[idx + 0 * map_size] + coor_x) * x_step + x_start;
+        res_box[cur_valid_box_id * kBoxBlockSize + 1] = 
+            (reg[idx + 1 * map_size] + coor_y) * y_step + y_start;
+        res_box[cur_valid_box_id * kBoxBlockSize + 2] = hei[idx];
+        res_box[cur_valid_box_id * kBoxBlockSize + 3] = 
+                                expf(dim[idx + 0 * map_size]) * rescale_factor[label]; // nms scale
+        res_box[cur_valid_box_id * kBoxBlockSize + 4] = 
+                                expf(dim[idx + 1 * map_size]) * rescale_factor[label];
+        res_box[cur_valid_box_id * kBoxBlockSize + 5] = 
+                                expf(dim[idx + 2 * map_size]) * rescale_factor[label];
+        res_box[cur_valid_box_id * kBoxBlockSize + 6] = atan2f(rot[idx], rot[idx + map_size]);
+        res_box[cur_valid_box_id * kBoxBlockSize + 7] = vel[idx];
+        res_box[cur_valid_box_id * kBoxBlockSize + 8] = vel[idx + map_size];
+
+
+        res_conf[cur_valid_box_id] = conf;
+        res_cls[cur_valid_box_id] = label;
+    }
+}
+
+PostprocessGPU::PostprocessGPU(const int _class_num, 
+                               const float _score_thresh,
+                               const float _nms_thresh, 
+                               const int _nms_pre_maxnum,
+                               const int _nms_post_maxnum, 
+                               const int _down_sample, 
+                               const int _output_h, 
+                               const int _output_w, 
+                               const float _x_step, 
+                               const float _y_step,
+                               const float _x_start, 
+                               const float _y_start,
+                               const std::vector<int>& _class_num_pre_task,
+                               const std::vector<float>& _nms_rescale_factor) :
+                               class_num(_class_num),
+                               score_thresh(_score_thresh),
+                               nms_thresh(_nms_thresh), 
+                               nms_pre_maxnum(_nms_pre_maxnum),
+                               nms_post_maxnum(_nms_post_maxnum), 
+                               down_sample(_down_sample),
+                               output_h(_output_h), 
+                               output_w(_output_w), 
+                               x_step(_x_step),
+                               y_step(_y_step), 
+                               x_start(_x_start), 
+                               y_start(_y_start),
+                               map_size(output_h * output_w),
+                               class_num_pre_task(_class_num_pre_task),
+                               nms_rescale_factor(_nms_rescale_factor),
+                               task_num(_class_num_pre_task.size()){
+
+    CHECK_CUDA(cudaMalloc((void**)&boxes_dev, kBoxBlockSize * map_size * sizeof(float)));
+    CHECK_CUDA(cudaMalloc((void**)&score_dev, map_size * sizeof(float)));
+    CHECK_CUDA(cudaMalloc((void**)&cls_dev, map_size * sizeof(int)));
+    CHECK_CUDA(cudaMalloc((void**)&sorted_indices_dev, map_size * sizeof(int)));
+    CHECK_CUDA(cudaMalloc((void**)&valid_box_num, sizeof(int)));
+    CHECK_CUDA(cudaMalloc((void**)&nms_rescale_factor_dev, class_num * sizeof(float)));
+
+    CHECK_CUDA(cudaMallocHost((void**)&boxes_host, kBoxBlockSize * map_size * sizeof(float)));
+    CHECK_CUDA(cudaMallocHost((void**)&score_host, nms_pre_maxnum * sizeof(float)));
+    CHECK_CUDA(cudaMallocHost((void**)&cls_host, map_size * sizeof(float)));
+    CHECK_CUDA(cudaMallocHost((void**)&sorted_indices_host, nms_pre_maxnum * sizeof(int)));
+    CHECK_CUDA(cudaMallocHost((void**)&keep_data_host, nms_pre_maxnum * sizeof(long)));
+
+    CHECK_CUDA(cudaMemcpy(nms_rescale_factor_dev, nms_rescale_factor.data(),
+                                        class_num * sizeof(float), cudaMemcpyHostToDevice));
+
+    iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
+
+
+    for(auto i = 0; i < nms_rescale_factor.size(); i++){
+        printf("%.2f%c", nms_rescale_factor[i], i == nms_rescale_factor.size() - 1 ? '\n' : ' ');
+    }
+
+}
+PostprocessGPU::~PostprocessGPU(){
+    CHECK_CUDA(cudaFree(boxes_dev));
+    CHECK_CUDA(cudaFree(score_dev));
+    CHECK_CUDA(cudaFree(cls_dev));
+    CHECK_CUDA(cudaFree(sorted_indices_dev));
+    CHECK_CUDA(cudaFree(valid_box_num));
+    CHECK_CUDA(cudaFree(nms_rescale_factor_dev));
+
+    CHECK_CUDA(cudaFreeHost(boxes_host));
+    CHECK_CUDA(cudaFreeHost(score_host));
+    CHECK_CUDA(cudaFreeHost(cls_host));
+    CHECK_CUDA(cudaFreeHost(sorted_indices_host));
+    CHECK_CUDA(cudaFreeHost(keep_data_host));
+}
+
+
+void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box>& out_detections){
+
+    // bev_buffer : BEV_feat, reg_0, hei_0, dim_0, rot_0, vel_0, heatmap_0, reg_1 ...
+    const int task_num = class_num_pre_task.size();
+    int cur_start_label = 0;
+    for(int i = 0; i < task_num; i++){
+        float* reg = (float*)bev_buffer[i * 6 + 0];     // 2 x 128 x 128
+        float* hei = (float*)bev_buffer[i * 6 + 1];     // 1 x 128 x 128
+        float* dim = (float*)bev_buffer[i * 6 + 2];     // 3 x 128 x 128
+        float* rot = (float*)bev_buffer[i * 6 + 3];     // 2 x 128 x 128
+        float* vel = (float*)bev_buffer[i * 6 + 4];     // 2 x 128 x 128
+        float* heatmap = (float*)bev_buffer[i * 6 + 5]; // c x 128 x 128
+
+        dim3 grid(DIVUP(map_size, NUM_THREADS));
+        CHECK_CUDA(cudaMemset(valid_box_num, 0, sizeof(int)));
+        BEVDecodeObjectKernel<<<grid, NUM_THREADS>>>(map_size, score_thresh, 
+                                         x_start, y_start, x_step, y_step, output_h,
+                                         output_w, down_sample, class_num_pre_task[i],
+                                         cur_start_label, reg, hei, dim, rot, 
+                                         vel, 
+                                         heatmap,
+                                         boxes_dev, score_dev, cls_dev, valid_box_num,
+                                         nms_rescale_factor_dev);
+
+        /*
+        此时 boxes_dev, score_dev, cls_dev 有 valid_box_num 个元素，可能大于nms_pre_maxnum, 而且是无序排列的
+        */ 
+        int box_num_pre = 0;
+        CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));
+
+        thrust::sequence(thrust::device, sorted_indices_dev, sorted_indices_dev + box_num_pre);
+        thrust::sort_by_key(thrust::device, score_dev, score_dev + box_num_pre, 
+                            sorted_indices_dev, thrust::greater<float>());
+        // 此时 score_dev 是降序排列的，而 sorted_indices_dev 索引着原顺序，
+        // 即 sorted_indices_dev[i] = j; i:现在的位置，j:原索引;  j:[0, map_size)
+
+
+        box_num_pre = std::min(box_num_pre, nms_pre_maxnum);
+
+        int box_num_post = iou3d_nms->DoIou3dNms(box_num_pre, boxes_dev, 
+                                                        sorted_indices_dev, keep_data_host);
+
+        box_num_post = std::min(box_num_post, nms_post_maxnum);
+
+
+        CHECK_CUDA(cudaMemcpy(sorted_indices_host, sorted_indices_dev, box_num_pre * sizeof(int),
+                                                                    cudaMemcpyDeviceToHost));
+        CHECK_CUDA(cudaMemcpy(boxes_host, boxes_dev, kBoxBlockSize * map_size * sizeof(float),
+                                                                    cudaMemcpyDeviceToHost));
+        CHECK_CUDA(cudaMemcpy(score_host, score_dev, box_num_pre * sizeof(float), 
+                                                                    cudaMemcpyDeviceToHost));
+        CHECK_CUDA(cudaMemcpy(cls_host, cls_dev, map_size * sizeof(float), 
+                                                                    cudaMemcpyDeviceToHost));
+
+
+        for (auto j = 0; j < box_num_post; j++) {
+            int k = keep_data_host[j];
+            int idx = sorted_indices_host[k];
+            Box box;
+            box.x = boxes_host[idx * kBoxBlockSize + 0];
+            box.y = boxes_host[idx * kBoxBlockSize + 1];
+            box.z = boxes_host[idx * kBoxBlockSize + 2];
+            box.l = boxes_host[idx * kBoxBlockSize + 3] / nms_rescale_factor[cls_host[idx]];
+            box.w = boxes_host[idx * kBoxBlockSize + 4] / nms_rescale_factor[cls_host[idx]];
+            box.h = boxes_host[idx * kBoxBlockSize + 5] / nms_rescale_factor[cls_host[idx]];
+            box.r = boxes_host[idx * kBoxBlockSize + 6];
+            box.vx = boxes_host[idx * kBoxBlockSize + 7];
+            box.vy = boxes_host[idx * kBoxBlockSize + 8];
+
+            box.label = cls_host[idx];
+            box.score = score_host[k];
+            box.z -= box.h * 0.5; // bottom height
+            out_detections.push_back(box);
+        }
+        
+        cur_start_label += class_num_pre_task[i];
+    }
+}
diff --git a/perception/tensorrt_bevdet/src/preprocess.cu b/perception/tensorrt_bevdet/src/preprocess.cu
new file mode 100644
index 0000000000000..b37a7b5a0e89d
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/preprocess.cu
@@ -0,0 +1,22 @@
+#include "common.hpp"
+#include "preprocess.hpp"
+#include <thrust/fill.h>
+#include <fstream>
+
+__global__ void convert_RGBHWC_to_BGRCHW_kernel(uchar *input, uchar *output, 
+                                                            int channels, int height, int width){
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    if(index < channels * height * width){
+        int y = index / 3 / width;
+        int x = index / 3 % width;
+        int c = 2 - index % 3;  // RGB to BGR
+
+        output[c * height * width + y * width + x] = input[index];
+    }
+}
+// RGBHWC to BGRCHW
+void convert_RGBHWC_to_BGRCHW(uchar *input, uchar *output, 
+                                                        int channels, int height, int width){
+    convert_RGBHWC_to_BGRCHW_kernel<<<DIVUP(channels * height * width, NUM_THREADS), NUM_THREADS>>>
+                                                            (input, output, channels, height, width);
+}
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/src/preprocess_plugin.cu b/perception/tensorrt_bevdet/src/preprocess_plugin.cu
new file mode 100644
index 0000000000000..e23c0af29c2a0
--- /dev/null
+++ b/perception/tensorrt_bevdet/src/preprocess_plugin.cu
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "preprocess_plugin.hpp"
+#include <cuda_fp16.h>
+#include <cuda_runtime_api.h>
+
+#include "common.hpp"
+#include <iostream>
+
+// kernel for GPU
+
+template<typename T>
+__global__ void preprocess_kernel(const uint8_t * src_dev, 
+                                T* dst_dev, 
+                                int src_row_step, 
+                                int dst_row_step, 
+                                int src_img_step, 
+                                int dst_img_step,
+                                int src_h, 
+                                int src_w, 
+                                float radio_h, 
+                                float radio_w, 
+                                float offset_h, 
+                                float offset_w, 
+                                const float * mean, 
+                                const float * std,
+                                int dst_h,
+                                int dst_w,
+                                int n){
+
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if(idx >= dst_h * dst_w * n) return;
+    
+    int i = (idx / n) / dst_w;
+    int j = (idx / n) % dst_w;
+    int k = idx % n;
+
+	int pX = (int) roundf((i / radio_h) + offset_h);
+	int pY = (int) roundf((j / radio_w) + offset_w);
+ 
+	if (pX < src_h && pX >= 0 && pY < src_w && pY >= 0){
+        int s1 = k * src_img_step + 0 * src_img_step / 3 + pX * src_row_step + pY;
+        int s2 = k * src_img_step + 1 * src_img_step / 3 + pX * src_row_step + pY;
+        int s3 = k * src_img_step + 2 * src_img_step / 3 + pX * src_row_step + pY;
+
+        int d1 = k * dst_img_step + 0 * dst_img_step / 3 + i * dst_row_step + j;
+        int d2 = k * dst_img_step + 1 * dst_img_step / 3 + i * dst_row_step + j;
+        int d3 = k * dst_img_step + 2 * dst_img_step / 3 + i * dst_row_step + j;
+
+		dst_dev[d1] = (static_cast<T>(src_dev[s1]) - static_cast<T>(mean[0])) / static_cast<T>(std[0]);
+		dst_dev[d2] = (static_cast<T>(src_dev[s2]) - static_cast<T>(mean[1])) / static_cast<T>(std[1]);
+		dst_dev[d3] = (static_cast<T>(src_dev[s3]) - static_cast<T>(mean[2])) / static_cast<T>(std[2]);
+	}
+}
+
+
+namespace nvinfer1
+{
+// class PreprocessPlugin
+PreprocessPlugin::PreprocessPlugin(const std::string &name, int crop_h, int crop_w, float resize_radio):
+    name_(name){
+    m_.crop_h = crop_h;
+    m_.crop_w = crop_w;
+    m_.resize_radio = resize_radio;
+}
+
+PreprocessPlugin::PreprocessPlugin(const std::string &name, const void *buffer, size_t length):
+    name_(name){
+    memcpy(&m_, buffer, sizeof(m_));
+}
+
+PreprocessPlugin::~PreprocessPlugin(){
+}
+
+IPluginV2DynamicExt *PreprocessPlugin::clone() const noexcept {
+    auto p = new PreprocessPlugin(name_, &m_, sizeof(m_));
+    p->setPluginNamespace(namespace_.c_str());
+    return p;
+}
+
+int32_t PreprocessPlugin::getNbOutputs() const noexcept {
+    return 1;
+}
+ 
+DataType PreprocessPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
+                                                                int32_t nbInputs) const noexcept {
+    // return DataType::kHALF;
+    return DataType::kFLOAT;
+}
+
+DimsExprs PreprocessPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
+                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
+    int input_h = inputs[0].d[2]->getConstantValue();
+    int input_w = inputs[0].d[3]->getConstantValue() * 4;
+
+    int output_h = input_h * m_.resize_radio - m_.crop_h;
+    int output_w = input_w * m_.resize_radio - m_.crop_w;
+
+    DimsExprs ret;
+    ret.nbDims = inputs[0].nbDims;
+    ret.d[0] = inputs[0].d[0];
+    ret.d[1] = inputs[0].d[1];
+    ret.d[2] =  exprBuilder.constant(output_h);
+    ret.d[3] =  exprBuilder.constant(output_w);
+    
+    return ret; 
+}
+
+bool PreprocessPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
+                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
+    bool res;
+    switch (pos)
+    {
+    case 0: // images
+        res = (inOut[0].type == DataType::kINT8 || inOut[0].type == DataType::kINT32) && 
+                inOut[0].format == TensorFormat::kLINEAR;
+        break;
+    case 1: // mean
+        res = (inOut[1].type == DataType::kFLOAT) &&
+                inOut[1].format == TensorFormat::kLINEAR;
+        break;
+    case 2: // std
+        res = (inOut[2].type == DataType::kFLOAT) &&
+                inOut[2].format == TensorFormat::kLINEAR;
+        break;
+    case 3: // 输出 img tensor
+        res = (inOut[3].type == DataType::kFLOAT || inOut[3].type == DataType::kHALF) && 
+                inOut[3].format == inOut[0].format;
+
+        // res = inOut[3].type == DataType::kHALF && inOut[3].format == inOut[0].format;
+        break;
+    default: 
+        res = false;
+    }
+    return res;
+}
+
+void PreprocessPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
+                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
+    return;
+}
+
+size_t PreprocessPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
+                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
+    return 0;
+}
+
+int32_t PreprocessPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
+    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+    
+    int n_img = inputDesc[0].dims.d[0];
+    int src_img_h = inputDesc[0].dims.d[2];
+    int src_img_w = inputDesc[0].dims.d[3] * 4;
+
+    int dst_img_h = outputDesc[0].dims.d[2];
+    int dst_img_w = outputDesc[0].dims.d[3];
+
+    int src_row_step = src_img_w;
+    int dst_row_step = dst_img_w;
+
+    int src_img_step = src_img_w * src_img_h * 3;
+    int dst_img_step = dst_img_w * dst_img_h * 3;
+
+    float offset_h = m_.crop_h / m_.resize_radio;
+    float offset_w = m_.crop_w / m_.resize_radio;
+
+    dim3 grid(DIVUP(dst_img_h * dst_img_w * n_img,  NUM_THREADS));
+    dim3 block(NUM_THREADS);
+
+    switch (int(outputDesc[0].type))
+    {
+    case int(DataType::kFLOAT):
+        // printf("pre : float\n");
+        preprocess_kernel<<<grid, block, 0, stream>>>(
+                                                reinterpret_cast<const uint8_t *>(inputs[0]),
+                                                reinterpret_cast<float *>(outputs[0]),
+                                                src_row_step, 
+                                                dst_row_step, 
+                                                src_img_step,
+                                                dst_img_step, 
+                                                src_img_h, 
+                                                src_img_w, 
+                                                m_.resize_radio,
+                                                m_.resize_radio, 
+                                                offset_h, 
+                                                offset_w, 
+                                                reinterpret_cast<const float *>(inputs[1]), 
+                                                reinterpret_cast<const float *>(inputs[2]),
+                                                dst_img_h, 
+                                                dst_img_w,
+                                                n_img);
+        break;
+    case int(DataType::kHALF):
+        // printf("pre : half\n");
+        preprocess_kernel<<<grid, block, 0, stream>>>(
+                                                reinterpret_cast<const uint8_t *>(inputs[0]),
+                                                reinterpret_cast<__half *>(outputs[0]),
+                                                src_row_step, 
+                                                dst_row_step, 
+                                                src_img_step,
+                                                dst_img_step, 
+                                                src_img_h, 
+                                                src_img_w, 
+                                                m_.resize_radio,
+                                                m_.resize_radio, 
+                                                offset_h, 
+                                                offset_w, 
+                                                reinterpret_cast<const float *>(inputs[1]), 
+                                                reinterpret_cast<const float *>(inputs[2]),
+                                                dst_img_h, 
+                                                dst_img_w,
+                                                n_img);
+
+        break;
+    default: // should NOT be here
+        printf("\tUnsupport datatype!\n");
+    }
+    return 0;
+}
+
+void PreprocessPlugin::destroy() noexcept {
+    delete this;
+    return;
+}
+
+int32_t PreprocessPlugin::initialize() noexcept {
+    return 0;
+}
+
+void PreprocessPlugin::terminate() noexcept {
+    return;
+}
+
+size_t PreprocessPlugin::getSerializationSize() const noexcept {
+    return sizeof(m_);
+}
+
+void PreprocessPlugin::serialize(void *buffer) const noexcept {
+    memcpy(buffer, &m_, sizeof(m_));
+    return;
+}
+
+void PreprocessPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *PreprocessPlugin::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *PreprocessPlugin::getPluginType() const noexcept {
+    return PRE_PLUGIN_NAME;
+}
+
+const char *PreprocessPlugin::getPluginVersion() const noexcept {
+    return PRE_PLUGIN_VERSION;
+}
+
+void PreprocessPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
+                                                        IGpuAllocator *gpuAllocator) noexcept {
+    return;
+}
+
+void PreprocessPlugin::detachFromContext() noexcept {
+    return;
+}
+
+// class PreprocessPluginCreator
+PluginFieldCollection    PreprocessPluginCreator::fc_ {};
+std::vector<PluginField> PreprocessPluginCreator::attr_;
+
+PreprocessPluginCreator::PreprocessPluginCreator() {
+    
+    attr_.clear();
+    attr_.emplace_back(PluginField("crop_h", nullptr, PluginFieldType::kINT32, 1));
+    attr_.emplace_back(PluginField("crop_w", nullptr, PluginFieldType::kINT32, 1));
+    attr_.emplace_back(PluginField("resize_radio", nullptr, PluginFieldType::kFLOAT32, 1));
+
+    fc_.nbFields = attr_.size();
+    fc_.fields   = attr_.data();
+}
+
+PreprocessPluginCreator::~PreprocessPluginCreator() {
+}
+
+
+IPluginV2DynamicExt *PreprocessPluginCreator::createPlugin(const char *name, 
+                                    const PluginFieldCollection *fc) noexcept {
+    
+    const PluginField *fields = fc->fields;
+
+    int crop_h = -1;
+    int crop_w = -1;
+    float resize_radio = 0.f;
+
+    for (int i = 0; i < fc->nbFields; ++i){
+        if(std::string(fc->fields[i].name) == std::string("crop_h")){
+            crop_h = *reinterpret_cast<const int *>(fc->fields[i].data);
+        }
+        else if(std::string(fc->fields[i].name) == std::string("crop_w")){
+            crop_w = *reinterpret_cast<const int *>(fc->fields[i].data);
+        }
+        else if(std::string(fc->fields[i].name) == std::string("resize_radio")){
+            resize_radio = *reinterpret_cast<const float *>(fc->fields[i].data);
+        }
+    }
+    PreprocessPlugin *pObj = new PreprocessPlugin(name, crop_h, crop_w, resize_radio);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+IPluginV2DynamicExt *PreprocessPluginCreator::deserializePlugin(const char *name, 
+                                        const void *serialData, size_t serialLength) noexcept {
+    PreprocessPlugin *pObj = new PreprocessPlugin(name, serialData, serialLength);
+    pObj->setPluginNamespace(namespace_.c_str());
+    return pObj;
+}
+
+void PreprocessPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
+    namespace_ = pluginNamespace;
+    return;
+}
+
+const char *PreprocessPluginCreator::getPluginNamespace() const noexcept {
+    return namespace_.c_str();
+}
+
+const char *PreprocessPluginCreator::getPluginName() const noexcept {
+    return PRE_PLUGIN_NAME;
+}
+
+const char *PreprocessPluginCreator::getPluginVersion() const noexcept {
+    return PRE_PLUGIN_VERSION;
+}
+
+const PluginFieldCollection *PreprocessPluginCreator::getFieldNames() noexcept {
+    return &fc_;
+}
+
+REGISTER_TENSORRT_PLUGIN(PreprocessPluginCreator);
+
+} // namespace nvinfer1

From 1e1fd44ac90bc4a90246026d0950d6b2b5061278 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 03:55:29 +0000
Subject: [PATCH 02/85] style(pre-commit): autofix

---
 .../detector/camera_bev_detector.launch.xml   |    4 +-
 perception/tensorrt_bevdet/CMakeLists.txt     |   14 +-
 perception/tensorrt_bevdet/README.md          |   30 +-
 .../tensorrt_bevdet/config/bevdet.param.yaml  |    2 +-
 .../config/bevdet_r50_4dlongterm_depth.yaml   |   91 +-
 .../tensorrt_bevdet/config/cams_param.yaml    |  182 ++-
 .../include/alignbev_plugin.hpp               |  125 +-
 perception/tensorrt_bevdet/include/bevdet.hpp |  399 +++--
 .../tensorrt_bevdet/include/bevdet_node.hpp   |  184 ++-
 .../include/bevpool_plugin.hpp                |  131 +-
 perception/tensorrt_bevdet/include/common.hpp |  106 +-
 .../include/cpu_jpegdecoder.hpp               |   12 +-
 perception/tensorrt_bevdet/include/data.hpp   |  153 +-
 .../include/gatherbev_plugin.hpp              |  124 +-
 .../tensorrt_bevdet/include/iou3d_nms.hpp     |   32 +-
 .../tensorrt_bevdet/include/nvjpegdecoder.hpp |  134 +-
 .../tensorrt_bevdet/include/postprocess.hpp   |  106 +-
 .../tensorrt_bevdet/include/preprocess.hpp    |    9 +-
 .../include/preprocess_plugin.hpp             |  132 +-
 .../launch/tensorrt_bevdet.launch.xml         |   64 +-
 perception/tensorrt_bevdet/package.xml        |    9 +-
 .../tensorrt_bevdet/src/alignbev_plugin.cu    |  474 +++---
 perception/tensorrt_bevdet/src/bevdet.cpp     | 1349 ++++++++---------
 .../tensorrt_bevdet/src/bevdet_node.cpp       |  425 +++---
 .../tensorrt_bevdet/src/bevpool_plugin.cu     |  530 ++++---
 .../tensorrt_bevdet/src/cpu_jpegdecoder.cpp   |  125 +-
 perception/tensorrt_bevdet/src/data.cpp       |  314 ++--
 .../tensorrt_bevdet/src/gatherbev_plugin.cu   |  389 ++---
 perception/tensorrt_bevdet/src/iou3d_nms.cu   |  372 ++---
 .../tensorrt_bevdet/src/nvjpegdecoder.cpp     |  458 +++---
 perception/tensorrt_bevdet/src/postprocess.cu |  393 +++--
 perception/tensorrt_bevdet/src/preprocess.cu  |   31 +-
 .../tensorrt_bevdet/src/preprocess_plugin.cu  |  524 ++++---
 33 files changed, 3703 insertions(+), 3724 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index 716e205e249aa..6bcab50c3527e 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -23,7 +23,7 @@
   <arg name="number_of_cameras"/>
   <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
 
-<!-- BEVDet -->
+  <!-- BEVDet -->
   <group if="$(eval &quot;'$(var bev_detection_model)'=='bevdet'&quot;)">
     <push-ros-namespace namespace="bevdet"/>
     <group>
@@ -40,4 +40,4 @@
       </include>
     </group>
   </group>
-</launch>
\ No newline at end of file
+</launch>
diff --git a/perception/tensorrt_bevdet/CMakeLists.txt b/perception/tensorrt_bevdet/CMakeLists.txt
index d50811bce1898..8ce2191771872 100644
--- a/perception/tensorrt_bevdet/CMakeLists.txt
+++ b/perception/tensorrt_bevdet/CMakeLists.txt
@@ -46,10 +46,10 @@ include_directories(
   ${autoware_perception_msgs_INCLUDE_DIRS}
 )
 
-cuda_add_executable(${PROJECT_NAME}_node 
-    src/bevdet_node.cpp 
+cuda_add_executable(${PROJECT_NAME}_node
+    src/bevdet_node.cpp
     src/bevdet.cpp
-    src/preprocess.cu 
+    src/preprocess.cu
     src/iou3d_nms.cu
     src/postprocess.cu
     src/data.cpp
@@ -72,8 +72,8 @@ ament_target_dependencies(${PROJECT_NAME}_node
   "tf2_geometry_msgs"
 )
 
-target_link_libraries(${PROJECT_NAME}_node 
-    yaml-cpp 
+target_link_libraries(${PROJECT_NAME}_node
+    yaml-cpp
     libnvinfer.so
     libnvonnxparser.so
     libz.so
@@ -93,11 +93,11 @@ install(TARGETS ${PROJECT_NAME}_node
   DESTINATION lib/${PROJECT_NAME}
 )
 
-install(DIRECTORY 
+install(DIRECTORY
   launch
   config
   data
   DESTINATION share/${PROJECT_NAME}/
 )
 
-ament_package()
\ No newline at end of file
+ament_package()
diff --git a/perception/tensorrt_bevdet/README.md b/perception/tensorrt_bevdet/README.md
index c537de8d83a5c..a80dcc7a9ac7d 100644
--- a/perception/tensorrt_bevdet/README.md
+++ b/perception/tensorrt_bevdet/README.md
@@ -12,29 +12,29 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 ### Inputs
 
-| Name                 | Type                            | Description      |
-| -------------------- | ------------------------------- | ---------------- |
-| `~/input/pointcloud` | `sensor_msgs::msg::PointCloud2` | input pointcloud (only used for time alignment and display)|
-| `~/input/topic_img_fl` | `sensor_msgs::msg::Image` | input front_left camera image|
-| `~/input/topic_img_f` | `sensor_msgs::msg::Image` | input front camera image|
-| `~/input/topic_img_fr` | `sensor_msgs::msg::Image` | input front_right camera image|
-| `~/input/topic_img_bl` | `sensor_msgs::msg::Image` | input back_left camera image|
-| `~/input/topic_img_b` | `sensor_msgs::msg::Image` | input back camera image|
-| `~/input/topic_img_br` | `sensor_msgs::msg::Image` | input back_right camera image|
+| Name                   | Type                            | Description                                                 |
+| ---------------------- | ------------------------------- | ----------------------------------------------------------- |
+| `~/input/pointcloud`   | `sensor_msgs::msg::PointCloud2` | input pointcloud (only used for time alignment and display) |
+| `~/input/topic_img_fl` | `sensor_msgs::msg::Image`       | input front_left camera image                               |
+| `~/input/topic_img_f`  | `sensor_msgs::msg::Image`       | input front camera image                                    |
+| `~/input/topic_img_fr` | `sensor_msgs::msg::Image`       | input front_right camera image                              |
+| `~/input/topic_img_bl` | `sensor_msgs::msg::Image`       | input back_left camera image                                |
+| `~/input/topic_img_b`  | `sensor_msgs::msg::Image`       | input back camera image                                     |
+| `~/input/topic_img_br` | `sensor_msgs::msg::Image`       | input back_right camera image                               |
 
 ### Outputs
 
-| Name                       | Type                                             | Description          |
-| -------------------------- | ------------------------------------------------ | -------------------- |
-| `~/output/boxes`         | `autoware_perception_msgs::msg::DetectedObjects` | detected objects     |
-| `~/output/pointcloud`     | `sensor_msgs::msg::PointCloud2`          | output pointcloud (only used for display)    |
+| Name                  | Type                                             | Description                               |
+| --------------------- | ------------------------------------------------ | ----------------------------------------- |
+| `~/output/boxes`      | `autoware_perception_msgs::msg::DetectedObjects` | detected objects                          |
+| `~/output/pointcloud` | `sensor_msgs::msg::PointCloud2`                  | output pointcloud (only used for display) |
 
 ## Limittation
 
-The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset,  If you want to use this model to infer your data, you need to retrain it.
+The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.
 
 You can traine models by refer below links.
 
 [BEVDet](https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1)
 
-[BEVDet export onnx](https://github.com/LCH1238/BEVDet/tree/export)
\ No newline at end of file
+[BEVDet export onnx](https://github.com/LCH1238/BEVDet/tree/export)
diff --git a/perception/tensorrt_bevdet/config/bevdet.param.yaml b/perception/tensorrt_bevdet/config/bevdet.param.yaml
index 1176abfda968b..433d9caa97b64 100644
--- a/perception/tensorrt_bevdet/config/bevdet.param.yaml
+++ b/perception/tensorrt_bevdet/config/bevdet.param.yaml
@@ -8,4 +8,4 @@
       post_process_params:
         # post-process params
         score_thre: 0.2
-        class_names: ["car", "truck", "construction_vehicle", "bus", "trailer", "barrier", "motorcycle", "bicycle", "pedestrian", "traffic_cone"]
\ No newline at end of file
+        class_names: ["car", "truck", "construction_vehicle", "bus", "trailer", "barrier", "motorcycle", "bicycle", "pedestrian", "traffic_cone"]
diff --git a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
index 074e1a1664afb..d9f16eee7a851 100644
--- a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
+++ b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
@@ -1,51 +1,62 @@
 bev_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
 mean: [123.675, 116.28, 103.53]
 std: [58.395, 57.12, 57.375]
-use_depth: true 
+use_depth: true
 use_adj: true
 adj_num: 8
 transform_size: 6
 cam_params_size: 27
-sampling: nearest # nearest # or bicubic 
+sampling: nearest # nearest # or bicubic
 data_config:
-    Ncams: 6
-    cams: [CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT, CAM_BACK_LEFT, CAM_BACK, CAM_BACK_RIGHT]
-    crop: [140, 0]
-    flip: true
-    input_size: [256, 704]
-    resize: [-0.06, 0.11]
-    resize_test: 0.0
-    rot: [-5.4, 5.4]
-    src_size: [900, 1600]
+  Ncams: 6
+  cams: [CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT, CAM_BACK_LEFT, CAM_BACK, CAM_BACK_RIGHT]
+  crop: [140, 0]
+  flip: true
+  input_size: [256, 704]
+  resize: [-0.06, 0.11]
+  resize_test: 0.0
+  rot: [-5.4, 5.4]
+  src_size: [900, 1600]
 grid_config:
-    depth: [1.0, 60.0, 0.5]
-    x: [-51.2, 51.2, 0.8]
-    y: [-51.2, 51.2, 0.8]
-    z: [-5, 3, 8]
+  depth: [1.0, 60.0, 0.5]
+  x: [-51.2, 51.2, 0.8]
+  y: [-51.2, 51.2, 0.8]
+  z: [-5, 3, 8]
 model:
-    bevpool_channels: 80
-    coder:
-        code_size: 9
-        max_num: 500
-        post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
-        score_threshold: 0.1
-    common_head:
-        channels: [2, 1, 3, 2, 2]
-        names: [reg, height, dim, rot, vel]
-    down_sample: 16
-    tasks:
-    -   class_names: [car, truck, construction_vehicle, bus, trailer, barrier, motorcycle,
-            bicycle, pedestrian, traffic_cone]
-        num_class: 10
-test_cfg:
-    max_per_img: 500
-    max_pool_nms: false
-    min_radius: [4, 12, 10, 1, 0.85, 0.175]
-    nms_rescale_factor:
-    -   [1.0, 0.7, 0.7, 0.4, 0.55, 1.1, 1.0, 1.0, 1.5, 3.5]
-    nms_thr: [0.2]
-    nms_type: [rotate]
-    post_center_limit_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
-    post_max_size: 500
-    pre_max_size: 1000
+  bevpool_channels: 80
+  coder:
+    code_size: 9
+    max_num: 500
+    post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
     score_threshold: 0.1
+  common_head:
+    channels: [2, 1, 3, 2, 2]
+    names: [reg, height, dim, rot, vel]
+  down_sample: 16
+  tasks:
+    - class_names:
+        [
+          car,
+          truck,
+          construction_vehicle,
+          bus,
+          trailer,
+          barrier,
+          motorcycle,
+          bicycle,
+          pedestrian,
+          traffic_cone,
+        ]
+      num_class: 10
+test_cfg:
+  max_per_img: 500
+  max_pool_nms: false
+  min_radius: [4, 12, 10, 1, 0.85, 0.175]
+  nms_rescale_factor:
+    - [1.0, 0.7, 0.7, 0.4, 0.55, 1.1, 1.0, 1.0, 1.5, 3.5]
+  nms_thr: [0.2]
+  nms_type: [rotate]
+  post_center_limit_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
+  post_max_size: 500
+  pre_max_size: 1000
+  score_threshold: 0.1
diff --git a/perception/tensorrt_bevdet/config/cams_param.yaml b/perception/tensorrt_bevdet/config/cams_param.yaml
index 8a58de2a8b651..6a171b5c81493 100644
--- a/perception/tensorrt_bevdet/config/cams_param.yaml
+++ b/perception/tensorrt_bevdet/config/cams_param.yaml
@@ -1,99 +1,93 @@
 cams:
-    CAM_BACK:
-        cam_intrinsic:
-        -   [809.2209905677063, 0.0, 829.2196003259838]
-        -   [0.0, 809.2209905677063, 481.77842384512485]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_BACK/n015-2018-08-02-17-16-37+0800__CAM_BACK__1533201470437525.jpg
-        sensor2ego_rotation: [0.5037872666382278, -0.49740249788611096, -0.4941850223835201,
-            0.5045496097725578]
-        sensor2ego_translation: [0.0283260309358, 0.00345136761476, 1.57910346144]
-        sensor2lidar_rotation:
-        -   [-0.9999413173368413, 0.009846645553456103, -0.004517239644395279]
-        -   [0.004591280151619832, 0.0075097729192299114, -0.9999612609782784]
-        -   [-0.009812340660088988, -0.9999233205011492, -0.007554540934044881]
-        sensor2lidar_translation: [-0.0036954598058400734, -0.9075747504218441, -0.283221870904093]
-    CAM_BACK_LEFT:
-        cam_intrinsic:
-        -   [1256.7414812095406, 0.0, 792.1125740759628]
-        -   [0.0, 1256.7414812095406, 492.7757465151356]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-08-02-17-16-37+0800__CAM_BACK_LEFT__1533201470447423.jpg
-        sensor2ego_rotation: [0.6924185592174665, -0.7031619420114925, -0.11648342771943819,
-            0.11203317912370753]
-        sensor2ego_translation: [1.03569100218, 0.484795032713, 1.59097014818]
-        sensor2lidar_rotation:
-        -   [-0.3170637040675469, 0.019895609309549686, -0.9481955348413992]
-        -   [0.9480779188624351, 0.03287371923632269, -0.3163345987226865]
-        -   [0.024877044206231273, -0.9992614689428244, -0.029285651056243037]
-        sensor2lidar_translation: [-0.48313021193087025, 0.09925074956760227, -0.24976867779076528]
-    CAM_BACK_RIGHT:
-        cam_intrinsic:
-        -   [1259.5137405846733, 0.0, 807.2529053838625]
-        -   [0.0, 1259.5137405846733, 501.19579884916527]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_BACK_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_BACK_RIGHT__1533201470427893.jpg
-        sensor2ego_rotation: [0.12280980120078765, -0.132400842670559, -0.7004305821388234,
-            0.690496031265798]
-        sensor2ego_translation: [1.0148780988, -0.480568219723, 1.56239545128]
-        sensor2lidar_rotation:
-        -   [-0.3567246414755177, -0.005413898071006906, 0.934193887729864]
-        -   [-0.9335307099588551, 0.04018099149456167, -0.3562385457614421]
-        -   [-0.035608197481429044, -0.9991777507681946, -0.01938758989490032]
-        sensor2lidar_translation: [0.482312676691663, 0.0791837770804591, -0.27307179836459383]
-    CAM_FRONT:
-        cam_intrinsic:
-        -   [1266.417203046554, 0.0, 816.2670197447984]
-        -   [0.0, 1266.417203046554, 491.50706579294757]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_FRONT/n015-2018-08-02-17-16-37+0800__CAM_FRONT__1533201470412460.jpg
-        sensor2ego_rotation: [0.4998015430569128, -0.5030316162024876, 0.4997798114386805,
-            -0.49737083824542755]
-        sensor2ego_translation: [1.70079118954, 0.0159456324149, 1.51095763913]
-        sensor2lidar_rotation:
-        -   [0.9999693728739966, 0.006755601474249302, -0.003951602548949129]
-        -   [0.003824557675651031, 0.0187164545279605, 0.9998175168942008]
-        -   [0.006828328680530752, -0.9998020084889915, 0.018690044109277416]
-        sensor2lidar_translation: [-0.012715806721303125, 0.7688055822719093, -0.31059455973338856]
-    CAM_FRONT_LEFT:
-        cam_intrinsic:
-        -   [1272.5979470598488, 0.0, 826.6154927353808]
-        -   [0.0, 1272.5979470598488, 479.75165386361925]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_FRONT_LEFT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_LEFT__1533201470404874.jpg
-        sensor2ego_rotation: [0.6757265034669446, -0.6736266522251881, 0.21214015046209478,
-            -0.21122827103904068]
-        sensor2ego_translation: [1.52387798135, 0.494631336551, 1.50932822144]
-        sensor2lidar_rotation:
-        -   [0.5726028043564848, 0.0027092489875872503, -0.8198284505998853]
-        -   [0.8195566437415105, 0.024067561593338945, 0.5724924979229841]
-        -   [0.021282296451183572, -0.9997066632011964, 0.011560770255041255]
-        sensor2lidar_translation: [-0.4917212028847189, 0.5936531098472528, -0.31925386662383204]
-    CAM_FRONT_RIGHT:
-        cam_intrinsic:
-        -   [1260.8474446004698, 0.0, 807.968244525554]
-        -   [0.0, 1260.8474446004698, 495.3344268742088]
-        -   [0.0, 0.0, 1.0]
-        data_path: 
-            ./data/nuscenes/samples/CAM_FRONT_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_RIGHT__1533201470420339.jpg
-        sensor2ego_rotation: [0.2060347966337182, -0.2026940577919598, 0.6824507824531167,
-            -0.6713610884174485]
-        sensor2ego_translation: [1.5508477543, -0.493404796419, 1.49574800619]
-        sensor2lidar_rotation:
-        -   [0.5518728006969844, -0.010452325915577761, 0.8338627949092215]
-        -   [-0.8335213795734647, 0.024319671348433384, 0.5519516857293306]
-        -   [-0.0260484480307739, -0.9996495898405882, 0.004709128022186309]
-        sensor2lidar_translation: [0.49650027090206095, 0.6174621492795893, -0.32655958795607276]
-ego2global_rotation: [0.9984303573176436, -0.008635865272570774, 0.0025833156025800875,
-    -0.05527720957189669]
+  CAM_BACK:
+    cam_intrinsic:
+      - [809.2209905677063, 0.0, 829.2196003259838]
+      - [0.0, 809.2209905677063, 481.77842384512485]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_BACK/n015-2018-08-02-17-16-37+0800__CAM_BACK__1533201470437525.jpg
+    sensor2ego_rotation:
+      [0.5037872666382278, -0.49740249788611096, -0.4941850223835201, 0.5045496097725578]
+    sensor2ego_translation: [0.0283260309358, 0.00345136761476, 1.57910346144]
+    sensor2lidar_rotation:
+      - [-0.9999413173368413, 0.009846645553456103, -0.004517239644395279]
+      - [0.004591280151619832, 0.0075097729192299114, -0.9999612609782784]
+      - [-0.009812340660088988, -0.9999233205011492, -0.007554540934044881]
+    sensor2lidar_translation: [-0.0036954598058400734, -0.9075747504218441, -0.283221870904093]
+  CAM_BACK_LEFT:
+    cam_intrinsic:
+      - [1256.7414812095406, 0.0, 792.1125740759628]
+      - [0.0, 1256.7414812095406, 492.7757465151356]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-08-02-17-16-37+0800__CAM_BACK_LEFT__1533201470447423.jpg
+    sensor2ego_rotation:
+      [0.6924185592174665, -0.7031619420114925, -0.11648342771943819, 0.11203317912370753]
+    sensor2ego_translation: [1.03569100218, 0.484795032713, 1.59097014818]
+    sensor2lidar_rotation:
+      - [-0.3170637040675469, 0.019895609309549686, -0.9481955348413992]
+      - [0.9480779188624351, 0.03287371923632269, -0.3163345987226865]
+      - [0.024877044206231273, -0.9992614689428244, -0.029285651056243037]
+    sensor2lidar_translation: [-0.48313021193087025, 0.09925074956760227, -0.24976867779076528]
+  CAM_BACK_RIGHT:
+    cam_intrinsic:
+      - [1259.5137405846733, 0.0, 807.2529053838625]
+      - [0.0, 1259.5137405846733, 501.19579884916527]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_BACK_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_BACK_RIGHT__1533201470427893.jpg
+    sensor2ego_rotation:
+      [0.12280980120078765, -0.132400842670559, -0.7004305821388234, 0.690496031265798]
+    sensor2ego_translation: [1.0148780988, -0.480568219723, 1.56239545128]
+    sensor2lidar_rotation:
+      - [-0.3567246414755177, -0.005413898071006906, 0.934193887729864]
+      - [-0.9335307099588551, 0.04018099149456167, -0.3562385457614421]
+      - [-0.035608197481429044, -0.9991777507681946, -0.01938758989490032]
+    sensor2lidar_translation: [0.482312676691663, 0.0791837770804591, -0.27307179836459383]
+  CAM_FRONT:
+    cam_intrinsic:
+      - [1266.417203046554, 0.0, 816.2670197447984]
+      - [0.0, 1266.417203046554, 491.50706579294757]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_FRONT/n015-2018-08-02-17-16-37+0800__CAM_FRONT__1533201470412460.jpg
+    sensor2ego_rotation:
+      [0.4998015430569128, -0.5030316162024876, 0.4997798114386805, -0.49737083824542755]
+    sensor2ego_translation: [1.70079118954, 0.0159456324149, 1.51095763913]
+    sensor2lidar_rotation:
+      - [0.9999693728739966, 0.006755601474249302, -0.003951602548949129]
+      - [0.003824557675651031, 0.0187164545279605, 0.9998175168942008]
+      - [0.006828328680530752, -0.9998020084889915, 0.018690044109277416]
+    sensor2lidar_translation: [-0.012715806721303125, 0.7688055822719093, -0.31059455973338856]
+  CAM_FRONT_LEFT:
+    cam_intrinsic:
+      - [1272.5979470598488, 0.0, 826.6154927353808]
+      - [0.0, 1272.5979470598488, 479.75165386361925]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_FRONT_LEFT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_LEFT__1533201470404874.jpg
+    sensor2ego_rotation:
+      [0.6757265034669446, -0.6736266522251881, 0.21214015046209478, -0.21122827103904068]
+    sensor2ego_translation: [1.52387798135, 0.494631336551, 1.50932822144]
+    sensor2lidar_rotation:
+      - [0.5726028043564848, 0.0027092489875872503, -0.8198284505998853]
+      - [0.8195566437415105, 0.024067561593338945, 0.5724924979229841]
+      - [0.021282296451183572, -0.9997066632011964, 0.011560770255041255]
+    sensor2lidar_translation: [-0.4917212028847189, 0.5936531098472528, -0.31925386662383204]
+  CAM_FRONT_RIGHT:
+    cam_intrinsic:
+      - [1260.8474446004698, 0.0, 807.968244525554]
+      - [0.0, 1260.8474446004698, 495.3344268742088]
+      - [0.0, 0.0, 1.0]
+    data_path: ./data/nuscenes/samples/CAM_FRONT_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_RIGHT__1533201470420339.jpg
+    sensor2ego_rotation:
+      [0.2060347966337182, -0.2026940577919598, 0.6824507824531167, -0.6713610884174485]
+    sensor2ego_translation: [1.5508477543, -0.493404796419, 1.49574800619]
+    sensor2lidar_rotation:
+      - [0.5518728006969844, -0.010452325915577761, 0.8338627949092215]
+      - [-0.8335213795734647, 0.024319671348433384, 0.5519516857293306]
+      - [-0.0260484480307739, -0.9996495898405882, 0.004709128022186309]
+    sensor2lidar_translation: [0.49650027090206095, 0.6174621492795893, -0.32655958795607276]
+ego2global_rotation:
+  [0.9984303573176436, -0.008635865272570774, 0.0025833156025800875, -0.05527720957189669]
 ego2global_translation: [249.89610931430778, 917.5522573162784, 0.0]
-lidar2ego_rotation: [0.7077955119163518, -0.006492242056004365, 0.010646214713995808,
-    -0.7063073142877817]
+lidar2ego_rotation:
+  [0.7077955119163518, -0.006492242056004365, 0.010646214713995808, -0.7063073142877817]
 lidar2ego_translation: [0.943713, 0.0, 1.84023]
 scene_token: e7ef871f77f44331aefdebc24ec034b7
 timestamp: 1533201470448696
diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
index ccdd0e15546d9..a91d43cf6ce28 100644
--- a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
@@ -15,85 +15,102 @@
  */
 
 #include <NvInfer.h>
+#include <unistd.h>
+
 #include <cmath>
+#include <iostream>
 #include <string>
 #include <vector>
-#include <unistd.h>
-#include <iostream>
 
 namespace
 {
-static const char *ALIGN_PLUGIN_NAME {"AlignBEV"};
-static const char *ALIGN_PLUGIN_VERSION {"1"};
-} // namespace
+static const char * ALIGN_PLUGIN_NAME{"AlignBEV"};
+static const char * ALIGN_PLUGIN_VERSION{"1"};
+}  // namespace
 
 namespace nvinfer1
 {
 class AlignBEVPlugin : public IPluginV2DynamicExt
 {
 private:
-    const std::string name_;
-    std::string       namespace_;
-    struct
-    {
-    } m_;
+  const std::string name_;
+  std::string namespace_;
+  struct
+  {
+  } m_;
 
 public:
-    AlignBEVPlugin() = delete;
-    AlignBEVPlugin(const std::string &name);
-    AlignBEVPlugin(const std::string &name, const void *buffer, size_t length);
-    ~AlignBEVPlugin();
+  AlignBEVPlugin() = delete;
+  AlignBEVPlugin(const std::string & name);
+  AlignBEVPlugin(const std::string & name, const void * buffer, size_t length);
+  ~AlignBEVPlugin();
 
-    // Method inherited from IPluginV2
-    const char *getPluginType() const noexcept override;
-    const char *getPluginVersion() const noexcept override;
-    int32_t     getNbOutputs() const noexcept override;
-    int32_t     initialize() noexcept override;
-    void        terminate() noexcept override;
-    size_t      getSerializationSize() const noexcept override;
-    void        serialize(void *buffer) const noexcept override;
-    void        destroy() noexcept override;
-    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *getPluginNamespace() const noexcept override;
+  // Method inherited from IPluginV2
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int32_t getNbOutputs() const noexcept override;
+  int32_t initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 
-    // Method inherited from IPluginV2Ext
-    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
-    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
-    void     detachFromContext() noexcept override;
+  // Method inherited from IPluginV2Ext
+  DataType getOutputDataType(
+    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
+  void attachToContext(
+    cudnnContext * contextCudnn, cublasContext * contextCublas,
+    IGpuAllocator * gpuAllocator) noexcept override;
+  void detachFromContext() noexcept override;
 
-    // Method inherited from IPluginV2DynamicExt
-    IPluginV2DynamicExt *clone() const noexcept override;
-    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
-    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
-    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
-    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
-    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  // Method inherited from IPluginV2DynamicExt
+  IPluginV2DynamicExt * clone() const noexcept override;
+  DimsExprs getOutputDimensions(
+    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+    IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
+    int32_t nbOutputs) noexcept override;
+  void configurePlugin(
+    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+    int32_t nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+    int32_t nbOutputs) const noexcept override;
+  int32_t enqueue(
+    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
 
 protected:
-    // To prevent compiler warnings
-    using nvinfer1::IPluginV2::enqueue;
-    using nvinfer1::IPluginV2::getOutputDimensions;
-    using nvinfer1::IPluginV2::getWorkspaceSize;
-    using nvinfer1::IPluginV2Ext::configurePlugin;
+  // To prevent compiler warnings
+  using nvinfer1::IPluginV2::enqueue;
+  using nvinfer1::IPluginV2::getOutputDimensions;
+  using nvinfer1::IPluginV2::getWorkspaceSize;
+  using nvinfer1::IPluginV2Ext::configurePlugin;
 };
 
 class AlignBEVPluginCreator : public IPluginCreator
 {
 private:
-    static PluginFieldCollection    fc_;
-    static std::vector<PluginField> attr_;
-    std::string                     namespace_;
+  static PluginFieldCollection fc_;
+  static std::vector<PluginField> attr_;
+  std::string namespace_;
 
 public:
-    AlignBEVPluginCreator();
-    ~AlignBEVPluginCreator();
-    const char *                 getPluginName() const noexcept override;
-    const char *                 getPluginVersion() const noexcept override;
-    const PluginFieldCollection *getFieldNames() noexcept override;
-    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
-    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
-    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *                 getPluginNamespace() const noexcept override;
+  AlignBEVPluginCreator();
+  ~AlignBEVPluginCreator();
+  const char * getPluginName() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  const PluginFieldCollection * getFieldNames() noexcept override;
+  IPluginV2DynamicExt * createPlugin(
+    const char * name, const PluginFieldCollection * fc) noexcept override;
+  IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 };
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 1ad04b1c4704f..ec873b2f4286f 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -1,239 +1,224 @@
-#ifndef __BEVDET_HPP__
-#define __BEVDET_HPP__
+#ifndef BEVDET_HPP_
+#define BEVDET_HPP_
 
-#include <string>
-#include <vector>
-#include <map>
-#include <memory>
-#include <yaml-cpp/yaml.h>
+#include "NvInfer.h"
+#include "common.hpp"
+#include "data.hpp"
+#include "postprocess.hpp"
 
 #include <Eigen/Core>
 #include <Eigen/Geometry>
 
-
-#include "common.hpp"
-#include "postprocess.hpp"
-#include "data.hpp"
-
-#include "NvInfer.h"
 #include <NvOnnxParser.h>
+#include <yaml-cpp/yaml.h>
 
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
 
-class adjFrame{
+class adjFrame
+{
 public:
-    adjFrame(){}
-    adjFrame(int _n,
-             size_t _buf_size) : 
-             n(_n), 
-             buf_size(_buf_size),
-             scenes_token(_n),
-             ego2global_rot(_n),
-             ego2global_trans(_n) {
-        CHECK_CUDA(cudaMalloc((void**)&adj_buffer, _n * _buf_size));
-        CHECK_CUDA(cudaMemset(adj_buffer, 0, _n * _buf_size));
-        last = 0;
-        buffer_num = 0;
-        init = false;
-
-        for(auto &rot : ego2global_rot){
-            rot = Eigen::Quaternion<float>(0.f, 0.f, 0.f, 0.f);
-        }
-        for(auto &trans : ego2global_trans){
-            trans = Eigen::Translation3f(0.f, 0.f, 0.f);
-        }
-
-    }
-    const std::string& lastScenesToken() const{
-        return scenes_token[last];
-    }
-
-    void reset(){
-        last = 0; // origin -1
-        buffer_num = 0;
-        init = false;
-    }
-
-    void saveFrameBuffer(const void* curr_buffer, 
-                        const std::string &curr_token, 
-                        const Eigen::Quaternion<float> &_ego2global_rot,
-                        const Eigen::Translation3f &_ego2global_trans){
-        int iters = init ? 1 : n;
-        while(iters--){
-          last = (last + 1) % n;
-          CHECK_CUDA(cudaMemcpy((char*)adj_buffer + last * buf_size, curr_buffer,
-                          buf_size, cudaMemcpyDeviceToDevice));
-          scenes_token[last] = curr_token;
-          ego2global_rot[last] = _ego2global_rot;
-          ego2global_trans[last] = _ego2global_trans;
-          buffer_num = std::min(buffer_num + 1, n);
-        }
-        init = true;
-    }
-    int havingBuffer(int idx){
-        return static_cast<int>(idx < buffer_num);
+  adjFrame() {}
+  adjFrame(int _n, size_t _buf_size)
+  : n(_n), buf_size(_buf_size), scenes_token(_n), ego2global_rot(_n), ego2global_trans(_n)
+  {
+    CHECK_CUDA(cudaMalloc((void **)&adj_buffer, _n * _buf_size));
+    CHECK_CUDA(cudaMemset(adj_buffer, 0, _n * _buf_size));
+    last = 0;
+    buffer_num = 0;
+    init = false;
+
+    for (auto & rot : ego2global_rot) {
+      rot = Eigen::Quaternion<float>(0.f, 0.f, 0.f, 0.f);
     }
-
-    const void* getFrameBuffer(int idx){
-        idx = (-idx + last + n) % n;
-        return (char*)adj_buffer + idx * buf_size;
-    }
-    void getEgo2Global(int idx, 
-                    Eigen::Quaternion<float> &adj_ego2global_rot, 
-                    Eigen::Translation3f &adj_ego2global_trans){
-        idx = (-idx + last + n) % n;
-        adj_ego2global_rot = ego2global_rot[idx];
-        adj_ego2global_trans = ego2global_trans[idx];
+    for (auto & trans : ego2global_trans) {
+      trans = Eigen::Translation3f(0.f, 0.f, 0.f);
     }
-
-    ~adjFrame(){
-        CHECK_CUDA(cudaFree(adj_buffer));
+  }
+  const std::string & lastScenesToken() const { return scenes_token[last]; }
+
+  void reset()
+  {
+    last = 0;  // origin -1
+    buffer_num = 0;
+    init = false;
+  }
+
+  void saveFrameBuffer(
+    const void * curr_buffer, const std::string & curr_token,
+    const Eigen::Quaternion<float> & _ego2global_rot,
+    const Eigen::Translation3f & _ego2global_trans)
+  {
+    int iters = init ? 1 : n;
+    while (iters--) {
+      last = (last + 1) % n;
+      CHECK_CUDA(cudaMemcpy(
+        (char *)adj_buffer + last * buf_size, curr_buffer, buf_size, cudaMemcpyDeviceToDevice));
+      scenes_token[last] = curr_token;
+      ego2global_rot[last] = _ego2global_rot;
+      ego2global_trans[last] = _ego2global_trans;
+      buffer_num = std::min(buffer_num + 1, n);
     }
+    init = true;
+  }
+  int havingBuffer(int idx) { return static_cast<int>(idx < buffer_num); }
+
+  const void * getFrameBuffer(int idx)
+  {
+    idx = (-idx + last + n) % n;
+    return (char *)adj_buffer + idx * buf_size;
+  }
+  void getEgo2Global(
+    int idx, Eigen::Quaternion<float> & adj_ego2global_rot,
+    Eigen::Translation3f & adj_ego2global_trans)
+  {
+    idx = (-idx + last + n) % n;
+    adj_ego2global_rot = ego2global_rot[idx];
+    adj_ego2global_trans = ego2global_trans[idx];
+  }
+
+  ~adjFrame() { CHECK_CUDA(cudaFree(adj_buffer)); }
 
 private:
-    int n;
-    size_t buf_size;
+  int n;
+  size_t buf_size;
 
-    int last;
-    int buffer_num;
-    bool init;
+  int last;
+  int buffer_num;
+  bool init;
 
-    std::vector<std::string> scenes_token;
-    std::vector<Eigen::Quaternion<float>> ego2global_rot;
-    std::vector<Eigen::Translation3f> ego2global_trans;
+  std::vector<std::string> scenes_token;
+  std::vector<Eigen::Quaternion<float>> ego2global_rot;
+  std::vector<Eigen::Translation3f> ego2global_trans;
 
-    void* adj_buffer;
+  void * adj_buffer;
 };
 
-class BEVDet{
+class BEVDet
+{
 public:
-    BEVDet(){}
-    BEVDet(const std::string &config_file, int n_img,      
-                                    std::vector<Eigen::Matrix3f> _cams_intrin, 
-                                    std::vector<Eigen::Quaternion<float>> _cams2ego_rot, 
-                                    std::vector<Eigen::Translation3f> _cams2ego_trans,
-                                    const std::string &onnx_file,
-                                    const std::string &engine_file);
-  
-    int DoInfer(const camsData &cam_data,  std::vector<Box> &out_detections, 
-                                                            float &cost_time, int idx=-1);
-    ~BEVDet();
+  BEVDet() {}
+  BEVDet(
+    const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> _cams_intrin,
+    std::vector<Eigen::Quaternion<float>> _cams2ego_rot,
+    std::vector<Eigen::Translation3f> _cams2ego_trans, const std::string & onnx_file,
+    const std::string & engine_file);
 
-protected:
-    void InitParams(const std::string &config_file);
+  int DoInfer(
+    const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx = -1);
+  ~BEVDet();
 
-    void InitViewTransformer(std::shared_ptr<int> &ranks_bev_ptr, 
-                             std::shared_ptr<int> &ranks_depth_ptr, 
-                             std::shared_ptr<int> &ranks_feat_ptr, 
-                             std::shared_ptr<int> &interval_starts_ptr, 
-                             std::shared_ptr<int> &interval_lengths_ptr);
-    void ExportEngine(const std::string &onnxFile, const std::string &trtFile);
-    int InitEngine(const std::string &engine_file);
-
-    int DeserializeTRTEngine(const std::string &engine_file, 
-                             nvinfer1::ICudaEngine **engine_ptr);
+protected:
+  void InitParams(const std::string & config_file);
 
-    void MallocDeviceMemory();
+  void InitViewTransformer(
+    std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
+    std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
+    std::shared_ptr<int> & interval_lengths_ptr);
+  void ExportEngine(const std::string & onnxFile, const std::string & trtFile);
+  int InitEngine(const std::string & engine_file);
 
-    void InitCamParams(const std::vector<Eigen::Quaternion<float>> &curr_cams2ego_rot,
-                       const std::vector<Eigen::Translation3f> &curr_cams2ego_trans,
-                       const std::vector<Eigen::Matrix3f> &cams_intrin);
+  int DeserializeTRTEngine(const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr);
 
-    void GetAdjBEVFeature(const std::string &curr_scene_token, 
-                        const Eigen::Quaternion<float> &ego2global_rot,
-                        const Eigen::Translation3f &ego2global_trans);
+  void MallocDeviceMemory();
 
-    void GetCurr2AdjTransform(const Eigen::Quaternion<float> &curr_ego2global_rot,
-                            const Eigen::Quaternion<float> &adj_ego2global_rot,
-                            const Eigen::Translation3f &curr_ego2global_trans,
-                            const Eigen::Translation3f &adj_ego2global_trans,
-                            float* transform_dev);
+  void InitCamParams(
+    const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
+    const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
+    const std::vector<Eigen::Matrix3f> & cams_intrin);
 
+  void GetAdjBEVFeature(
+    const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
+    const Eigen::Translation3f & ego2global_trans);
 
+  void GetCurr2AdjTransform(
+    const Eigen::Quaternion<float> & curr_ego2global_rot,
+    const Eigen::Quaternion<float> & adj_ego2global_rot,
+    const Eigen::Translation3f & curr_ego2global_trans,
+    const Eigen::Translation3f & adj_ego2global_trans, float * transform_dev);
 
 private:
-
-    int N_img;
-
-    int src_img_h;
-    int src_img_w;
-    int input_img_h;
-    int input_img_w;
-    int crop_h;
-    int crop_w;
-    float resize_radio;
-    int down_sample;
-    int feat_h;
-    int feat_w;
-    int bev_h;
-    int bev_w;
-    int bevpool_channel;
-
-    float depth_start;
-    float depth_end;
-    float depth_step;
-    int depth_num;
-
-    float x_start;
-    float x_end;
-    float x_step;
-    int xgrid_num;
-
-    float y_start;
-    float y_end;
-    float y_step;
-    int ygrid_num;
-
-    float z_start;
-    float z_end;
-    float z_step;
-    int zgrid_num;
-
-    std::vector<float> mean;
-    std::vector<float> std;
-
-    bool use_depth;
-    bool use_adj;
-    int adj_num;
-
-
-    int class_num;
-    float score_thresh;
-    float nms_overlap_thresh;
-    int nms_pre_maxnum;
-    int nms_post_maxnum;
-    std::vector<float> nms_rescale_factor;
-    std::vector<int> class_num_pre_task;
-    std::map<std::string, int> out_num_task_head;
-
-    std::vector<Eigen::Matrix3f> cams_intrin;
-    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-    std::vector<Eigen::Translation3f> cams2ego_trans;
-
-    Eigen::Matrix3f post_rot;
-    Eigen::Translation3f post_trans;
-
-    std::vector<size_t> trt_buffer_sizes;
-    void** trt_buffer_dev;
-    float* cam_params_host;
-    void** post_buffer;
-
-    std::map<std::string, int> buffer_map;
-
-    int valid_feat_num;
-    int unique_bev_num;
-
-    int transform_size;
-    int cam_params_size;
-
-    Logger g_logger;
-
-    nvinfer1::ICudaEngine* trt_engine;
-    nvinfer1::IExecutionContext* trt_context;
-
-    std::unique_ptr<PostprocessGPU> postprocess_ptr;
-    std::unique_ptr<adjFrame> adj_frame_ptr;
-
+  int N_img;
+
+  int src_img_h;
+  int src_img_w;
+  int input_img_h;
+  int input_img_w;
+  int crop_h;
+  int crop_w;
+  float resize_radio;
+  int down_sample;
+  int feat_h;
+  int feat_w;
+  int bev_h;
+  int bev_w;
+  int bevpool_channel;
+
+  float depth_start;
+  float depth_end;
+  float depth_step;
+  int depth_num;
+
+  float x_start;
+  float x_end;
+  float x_step;
+  int xgrid_num;
+
+  float y_start;
+  float y_end;
+  float y_step;
+  int ygrid_num;
+
+  float z_start;
+  float z_end;
+  float z_step;
+  int zgrid_num;
+
+  std::vector<float> mean;
+  std::vector<float> std;
+
+  bool use_depth;
+  bool use_adj;
+  int adj_num;
+
+  int class_num;
+  float score_thresh;
+  float nms_overlap_thresh;
+  int nms_pre_maxnum;
+  int nms_post_maxnum;
+  std::vector<float> nms_rescale_factor;
+  std::vector<int> class_num_pre_task;
+  std::map<std::string, int> out_num_task_head;
+
+  std::vector<Eigen::Matrix3f> cams_intrin;
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+  std::vector<Eigen::Translation3f> cams2ego_trans;
+
+  Eigen::Matrix3f post_rot;
+  Eigen::Translation3f post_trans;
+
+  std::vector<size_t> trt_buffer_sizes;
+  void ** trt_buffer_dev;
+  float * cam_params_host;
+  void ** post_buffer;
+
+  std::map<std::string, int> buffer_map;
+
+  int valid_feat_num;
+  int unique_bev_num;
+
+  int transform_size;
+  int cam_params_size;
+
+  Logger g_logger;
+
+  nvinfer1::ICudaEngine * trt_engine;
+  nvinfer1::IExecutionContext * trt_context;
+
+  std::unique_ptr<PostprocessGPU> postprocess_ptr;
+  std::unique_ptr<adjFrame> adj_frame_ptr;
 };
 
-
-#endif
\ No newline at end of file
+#endif  // BEVDET_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index 832ac87d382b5..e83b55d0131f2 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -1,43 +1,37 @@
-#include <cstdio>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <thread>
-#include <chrono>
-#include <cassert>
-#include <map>
-
-#include <yaml-cpp/yaml.h>
 #include "bevdet.hpp"
 #include "cpu_jpegdecoder.hpp"
 
-#include <pcl/io/pcd_io.h>
-#include <pcl/point_types.h>
-#include <pcl/common/transforms.h>
-
-
-
 #include <opencv2/opencv.hpp>
-#include <cv_bridge/cv_bridge.h>
-#include <sensor_msgs/msg/image.hpp>
-
-#include <sensor_msgs/msg/point_cloud2.hpp>
-#include <pcl_conversions/pcl_conversions.h>  // msg2pcl
-
-#include <geometry_msgs/msg/point32.hpp>
-#include <geometry_msgs/msg/vector3.hpp>
-#include <geometry_msgs/msg/quaternion.hpp>
-#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
 
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
+#include <geometry_msgs/msg/point32.hpp>
+#include <geometry_msgs/msg/quaternion.hpp>
+#include <geometry_msgs/msg/vector3.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
 
+#include <cv_bridge/cv_bridge.h>
 #include <message_filters/subscriber.h>
-#include <message_filters/synchronizer.h>
 #include <message_filters/sync_policies/approximate_time.h>
+#include <message_filters/synchronizer.h>
+#include <pcl/common/transforms.h>
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+#include <pcl_conversions/pcl_conversions.h>  // msg2pcl
+#include <yaml-cpp/yaml.h>
 
+#include <cassert>
+#include <chrono>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <string>
+#include <thread>
 
 using std::chrono::duration;
 using std::chrono::high_resolution_clock;
@@ -47,94 +41,90 @@ typedef pcl::PointXYZI PointT;
 uint8_t getSemanticType(const std::string & class_name);
 void Getinfo(void);
 
-void box3DToDetectedObjects(const std::vector<Box>& boxes, 
-                    autoware_perception_msgs::msg::DetectedObjects & objects,
-                    const std::vector<std::string> & class_names,  
-                    float score_thre,
-                    const bool has_twist);
-
+void box3DToDetectedObjects(
+  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
+  const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
 
 // opencv Mat-> std::vector<char>
-int cvToArr(cv::Mat img, std::vector<char> &raw_data)
+int cvToArr(cv::Mat img, std::vector<char> & raw_data)
 {
-    if (img.empty())
-    {
-        std::cerr << "image is empty. " << std::endl;
-        return EXIT_FAILURE;
-    }
-    
-    std::vector<u_char> raw_data_;
-    cv::imencode(".jpg", img, raw_data_);
-    raw_data = std::vector<char>(raw_data_.begin(), raw_data_.end());
-    return EXIT_SUCCESS;
+  if (img.empty()) {
+    std::cerr << "image is empty. " << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::vector<u_char> raw_data_;
+  cv::imencode(".jpg", img, raw_data_);
+  raw_data = std::vector<char>(raw_data_.begin(), raw_data_.end());
+  return EXIT_SUCCESS;
 }
 
-int cvImgToArr(std::vector<cv::Mat> &imgs, std::vector<std::vector<char>> &imgs_data)
+int cvImgToArr(std::vector<cv::Mat> & imgs, std::vector<std::vector<char>> & imgs_data)
 {
-    imgs_data.resize(imgs.size());
-
-    for(size_t i = 0; i < imgs_data.size(); i++)
-    {   
-        if(cvToArr(imgs[i], imgs_data[i]))
-        {
-            return EXIT_FAILURE;
-        }
+  imgs_data.resize(imgs.size());
+
+  for (size_t i = 0; i < imgs_data.size(); i++) {
+    if (cvToArr(imgs[i], imgs_data[i])) {
+      return EXIT_FAILURE;
     }
-    return EXIT_SUCCESS;
+  }
+  return EXIT_SUCCESS;
 }
- 
+
 class TRTBEVDetNode : public rclcpp::Node
 {
 private:
+  size_t img_N_;
+  int img_w_;
+  int img_h_;
+
+  std::string model_config_;
+
+  std::string onnx_file_;
+  std::string engine_file_;
+
+  YAML::Node camconfig_;
+
+  std::vector<std::string> imgs_name_;
+  std::vector<std::string> class_names_;
+
+  camsData sampleData_;
+  std::shared_ptr<BEVDet> bevdet_;
+
+  uchar * imgs_dev_ = nullptr;
+  float score_thre_;
+
+  rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr pub_cloud_;
+  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
+    pub_boxes_;  // ros2无该消息类型
+
+  message_filters::Subscriber<sensor_msgs::msg::PointCloud2> sub_cloud_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_b_img_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_bl_img_;
+  message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_;
+
+  typedef message_filters::sync_policies::ApproximateTime<
+    sensor_msgs::msg::PointCloud2, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
+    sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
+    sensor_msgs::msg::Image>
+    MySyncPolicy;
 
-    size_t img_N_;
-    int img_w_; 
-    int img_h_;
-    
-    std::string model_config_;
-    
-    std::string onnx_file_;
-    std::string engine_file_;
-   
-    YAML::Node camconfig_; 
-    
-    std::vector<std::string> imgs_name_;
-    std::vector<std::string> class_names_;
-
-    camsData sampleData_;
-    std::shared_ptr<BEVDet> bevdet_;
-
-    uchar* imgs_dev_ = nullptr; 
-    float score_thre_;
-
-    rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr pub_cloud_;
-    rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr pub_boxes_;//ros2无该消息类型
-    
-    message_filters::Subscriber<sensor_msgs::msg::PointCloud2> sub_cloud_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_b_img_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_bl_img_; 
-    message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_; 
-
-    typedef message_filters::sync_policies::ApproximateTime<
-        sensor_msgs::msg::PointCloud2, 
-        sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
-        sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image> MySyncPolicy;
-    
-    typedef message_filters::Synchronizer<MySyncPolicy> Sync;
-    std::shared_ptr<Sync> sync_;
+  typedef message_filters::Synchronizer<MySyncPolicy> Sync;
+  std::shared_ptr<Sync> sync_;
 
 public:
-    TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
-    ~TRTBEVDetNode();
+  TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
+  ~TRTBEVDetNode();
 
-    void callback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud, 
+  void callback(
+    const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
-};
\ No newline at end of file
+};
diff --git a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
index cab0b58d60c98..2d41fdae42389 100644
--- a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
@@ -15,89 +15,106 @@
  */
 
 #include <NvInfer.h>
+#include <unistd.h>
+
 #include <cmath>
+#include <iostream>
 #include <map>
 #include <string>
-#include <unistd.h>
 #include <vector>
-#include <iostream>
 
 namespace
 {
-static const char *BEVPOOL_PLUGIN_NAME {"BEVPool"};
-static const char *BEVPOOL_PLUGIN_VERSION {"1"};
-} // namespace
+static const char * BEVPOOL_PLUGIN_NAME{"BEVPool"};
+static const char * BEVPOOL_PLUGIN_VERSION{"1"};
+}  // namespace
 
 namespace nvinfer1
 {
 class BEVPoolPlugin : public IPluginV2DynamicExt
 {
 private:
-    const std::string name_;
-    std::string       namespace_;
-    struct
-    {
-        int bev_h;
-        int bev_w;
-        int n;
-    } m_;
+  const std::string name_;
+  std::string namespace_;
+  struct
+  {
+    int bev_h;
+    int bev_w;
+    int n;
+  } m_;
 
 public:
-    BEVPoolPlugin() = delete;
-    BEVPoolPlugin(const std::string &name, int bev_h, int bev_w, int n);
-    BEVPoolPlugin(const std::string &name, const void *buffer, size_t length);
-    ~BEVPoolPlugin();
+  BEVPoolPlugin() = delete;
+  BEVPoolPlugin(const std::string & name, int bev_h, int bev_w, int n);
+  BEVPoolPlugin(const std::string & name, const void * buffer, size_t length);
+  ~BEVPoolPlugin();
 
-    // Method inherited from IPluginV2
-    const char *getPluginType() const noexcept override;
-    const char *getPluginVersion() const noexcept override;
-    int32_t     getNbOutputs() const noexcept override;
-    int32_t     initialize() noexcept override;
-    void        terminate() noexcept override;
-    size_t      getSerializationSize() const noexcept override;
-    void        serialize(void *buffer) const noexcept override;
-    void        destroy() noexcept override;
-    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *getPluginNamespace() const noexcept override;
+  // Method inherited from IPluginV2
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int32_t getNbOutputs() const noexcept override;
+  int32_t initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 
-    // Method inherited from IPluginV2Ext
-    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
-    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
-    void     detachFromContext() noexcept override;
+  // Method inherited from IPluginV2Ext
+  DataType getOutputDataType(
+    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
+  void attachToContext(
+    cudnnContext * contextCudnn, cublasContext * contextCublas,
+    IGpuAllocator * gpuAllocator) noexcept override;
+  void detachFromContext() noexcept override;
 
-    // Method inherited from IPluginV2DynamicExt
-    IPluginV2DynamicExt *clone() const noexcept override;
-    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
-    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
-    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
-    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
-    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  // Method inherited from IPluginV2DynamicExt
+  IPluginV2DynamicExt * clone() const noexcept override;
+  DimsExprs getOutputDimensions(
+    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+    IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
+    int32_t nbOutputs) noexcept override;
+  void configurePlugin(
+    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+    int32_t nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+    int32_t nbOutputs) const noexcept override;
+  int32_t enqueue(
+    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
 
 protected:
-    // To prevent compiler warnings
-    using nvinfer1::IPluginV2::enqueue;
-    using nvinfer1::IPluginV2::getOutputDimensions;
-    using nvinfer1::IPluginV2::getWorkspaceSize;
-    using nvinfer1::IPluginV2Ext::configurePlugin;
+  // To prevent compiler warnings
+  using nvinfer1::IPluginV2::enqueue;
+  using nvinfer1::IPluginV2::getOutputDimensions;
+  using nvinfer1::IPluginV2::getWorkspaceSize;
+  using nvinfer1::IPluginV2Ext::configurePlugin;
 };
 
 class BEVPoolPluginCreator : public IPluginCreator
 {
 private:
-    static PluginFieldCollection    fc_;
-    static std::vector<PluginField> attr_;
-    std::string                     namespace_;
+  static PluginFieldCollection fc_;
+  static std::vector<PluginField> attr_;
+  std::string namespace_;
 
 public:
-    BEVPoolPluginCreator();
-    ~BEVPoolPluginCreator();
-    const char *                 getPluginName() const noexcept override;
-    const char *                 getPluginVersion() const noexcept override;
-    const PluginFieldCollection *getFieldNames() noexcept override;
-    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
-    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
-    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *                 getPluginNamespace() const noexcept override;
+  BEVPoolPluginCreator();
+  ~BEVPoolPluginCreator();
+  const char * getPluginName() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  const PluginFieldCollection * getFieldNames() noexcept override;
+  IPluginV2DynamicExt * createPlugin(
+    const char * name, const PluginFieldCollection * fc) noexcept override;
+  IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 };
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
index e49a75804eb9a..5ee37cf3920f6 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -1,25 +1,28 @@
-#ifndef __COMMON_HPP__
-#define __COMMON_HPP__
+#ifndef COMMON_HPP_
+#define COMMON_HPP_
 
+#include <NvInfer.h>
 #include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include <iostream>
-
 #include <cuda_runtime.h>
-#include <NvInfer.h>
+#include <stdio.h>
+#include <stdlib.h>
 
+#include <iostream>
+#include <string>
 
 typedef unsigned char uchar;
 
-#define NUM_THREADS 512 
+#define NUM_THREADS 512
 
-#define CHECK_CUDA(ans) { GPUAssert((ans), __FILE__, __LINE__); }
+#define CHECK_CUDA(ans)                   \
+  {                                       \
+    GPUAssert((ans), __FILE__, __LINE__); \
+  }
 
-#define DIVUP(m, n) (((m) + (n)-1) / (n))
+#define DIVUP(m, n) (((m) + (n) - 1) / (n))
 
-inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort = true) {
+inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort = true)
+{
   if (code != cudaSuccess) {
     fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
     if (abort) exit(code);
@@ -30,82 +33,76 @@ inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort =
   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
 
 const int MAXTENSORDIMS = 6;
-struct TensorDesc {
+struct TensorDesc
+{
   int shape[MAXTENSORDIMS];
   int stride[MAXTENSORDIMS];
   int dim;
 };
 
-inline int GET_BLOCKS(const int N) {
+inline int GET_BLOCKS(const int N)
+{
   int optimal_block_num = DIVUP(N, NUM_THREADS);
   int max_block_num = 4096;
   return optimal_block_num < max_block_num ? optimal_block_num : max_block_num;
 }
 
-
 __inline__ std::string dataTypeToString(nvinfer1::DataType dataType)
 {
-    switch (dataType)
-    {
+  switch (dataType) {
     case nvinfer1::DataType::kFLOAT:
-        return std::string("FP32 ");
+      return std::string("FP32 ");
     case nvinfer1::DataType::kHALF:
-        return std::string("FP16 ");
+      return std::string("FP16 ");
     case nvinfer1::DataType::kINT8:
-        return std::string("INT8 ");
+      return std::string("INT8 ");
     case nvinfer1::DataType::kINT32:
-        return std::string("INT32");
+      return std::string("INT32");
     case nvinfer1::DataType::kBOOL:
-        return std::string("BOOL ");
+      return std::string("BOOL ");
     default:
-        return std::string("Unknown");
-    }
+      return std::string("Unknown");
+  }
 }
 
-
 __inline__ size_t dataTypeToSize(nvinfer1::DataType dataType)
 {
-    switch ((int)dataType)
-    {
+  switch ((int)dataType) {
     case int(nvinfer1::DataType::kFLOAT):
-        return 4;
+      return 4;
     case int(nvinfer1::DataType::kHALF):
-        return 2;
+      return 2;
     case int(nvinfer1::DataType::kINT8):
-        return 1;
+      return 1;
     case int(nvinfer1::DataType::kINT32):
-        return 4;
+      return 4;
     case int(nvinfer1::DataType::kBOOL):
-        return 1;
+      return 1;
     default:
-        return 4;
-    }
+      return 4;
+  }
 }
 
 __inline__ std::string shapeToString(nvinfer1::Dims32 dim)
 {
-    std::string output("(");
-    if (dim.nbDims == 0)
-    {
-        return output + std::string(")");
-    }
-    for (int i = 0; i < dim.nbDims - 1; ++i)
-    {
-        output += std::to_string(dim.d[i]) + std::string(", ");
-    }
-    output += std::to_string(dim.d[dim.nbDims - 1]) + std::string(")");
-    return output;
+  std::string output("(");
+  if (dim.nbDims == 0) {
+    return output + std::string(")");
+  }
+  for (int i = 0; i < dim.nbDims - 1; ++i) {
+    output += std::to_string(dim.d[i]) + std::string(", ");
+  }
+  output += std::to_string(dim.d[dim.nbDims - 1]) + std::string(")");
+  return output;
 }
 
+class Logger : public nvinfer1::ILogger
+{
+public:
+  explicit Logger(Severity severity = Severity::kWARNING) : reportable_severity(severity) {}
 
-
-
-
-class Logger : public nvinfer1::ILogger {
- public:
-  explicit Logger(Severity severity = Severity::kWARNING) : reportable_severity(severity){}
-
-  void log(Severity severity, const char *msg) noexcept override {
+  void log(Severity severity, const char * msg) noexcept override
+  {
     // suppress messages with severity enum value greater than the reportable
     if (severity > reportable_severity) return;
     switch (severity) {
@@ -131,5 +128,4 @@ class Logger : public nvinfer1::ILogger {
   Severity reportable_severity;
 };
 
-
-#endif
\ No newline at end of file
+#endif  // COMMON_HPP_
diff --git a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
index e8f5bc966e15e..8be620940717b 100644
--- a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
+++ b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
@@ -1,9 +1,11 @@
-#ifndef __CPU_JPEGDECODER__
-#define __CPU_JPEGDECODER__
+#ifndef CPU_JPEGDECODER_HPP_
+#define CPU_JPEGDECODER_HPP_
 
-#include <vector>
 #include "common.hpp"
 
-int decode_cpu(const std::vector<std::vector<char>> &files_data, uchar* out_imgs, size_t width, size_t height);
+#include <vector>
+
+int decode_cpu(
+  const std::vector<std::vector<char>> & files_data, uchar * out_imgs, size_t width, size_t height);
 
-#endif
\ No newline at end of file
+#endif  // CPU_JPEGDECODER_HPP_
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
index d27716b2e7e72..dfbb75dceeadd 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -1,118 +1,101 @@
-#ifndef __DATA_HPP__
-#define __DATA_HPP__
+#ifndef DATA_HPP_
+#define DATA_HPP_
 
-#include <vector>
+#include "common.hpp"
+#include "nvjpegdecoder.hpp"
 
 #include <Eigen/Core>
 #include <Eigen/Geometry>
 
 #include <yaml-cpp/yaml.h>
 
-#include "common.hpp"
-#include "nvjpegdecoder.hpp"
+#include <vector>
 
-struct camParams{
-    camParams() = default;
-    camParams(const YAML::Node &config, int n, std::vector<std::string> &cams_name);
+struct camParams
+{
+  camParams() = default;
+  camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name);
 
-    int N_img;
+  int N_img;
 
-    Eigen::Quaternion<float> ego2global_rot;
-    Eigen::Translation3f ego2global_trans;
+  Eigen::Quaternion<float> ego2global_rot;
+  Eigen::Translation3f ego2global_trans;
 
-    Eigen::Quaternion<float> lidar2ego_rot;
-    Eigen::Translation3f lidar2ego_trans;
-    // 
-    std::vector<Eigen::Matrix3f> cams_intrin;
-    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-    std::vector<Eigen::Translation3f> cams2ego_trans;
-    //
-    std::vector<std::string> imgs_file;
+  Eigen::Quaternion<float> lidar2ego_rot;
+  Eigen::Translation3f lidar2ego_trans;
+  //
+  std::vector<Eigen::Matrix3f> cams_intrin;
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+  std::vector<Eigen::Translation3f> cams2ego_trans;
+  //
+  std::vector<std::string> imgs_file;
 
-    unsigned long long timestamp;
-    std::string scene_token;
+  unsigned long long timestamp;
+  std::string scene_token;
 };
 
-struct camsData{
-    camsData() = default;
-    camsData(const camParams &_param) : param(_param), imgs_dev(nullptr){};
-    camParams param;
-    uchar* imgs_dev;
+struct camsData
+{
+  camsData() = default;
+  camsData(const camParams & _param) : param(_param), imgs_dev(nullptr) {};
+  camParams param;
+  uchar * imgs_dev;
 };
 
-
-class DataLoader{
+class DataLoader
+{
 public:
-    DataLoader() = default;
-    DataLoader(int _n_img, 
-               int _h, 
-               int _w,
-               const std::string &_data_infos_path,
-               const std::vector<std::string> &_cams_name,
-               bool _sep=true);
-
-    const std::vector<Eigen::Matrix3f>& get_cams_intrin() const{
-        return cams_intrin;
-    }
-    const std::vector<Eigen::Quaternion<float>>& get_cams2ego_rot() const{
-        return cams2ego_rot;
-    }
-    const std::vector<Eigen::Translation3f>& get_cams2ego_trans() const{
-        return cams2ego_trans;
-    }
-
-    const Eigen::Quaternion<float>& get_lidar2ego_rot() const{
-        return lidar2ego_rot;
-    }
-
-    const Eigen::Translation3f& get_lidar2ego_trans() const{
-        return lidar2ego_trans;
-    }
-
-    int size(){
-        return sample_num;
-    }
-
-    const camsData& data(int idx, bool time_order=true);
-    ~DataLoader();
+  DataLoader() = default;
+  DataLoader(
+    int _n_img, int _h, int _w, const std::string & _data_infos_path,
+    const std::vector<std::string> & _cams_name, bool _sep = true);
+
+  const std::vector<Eigen::Matrix3f> & get_cams_intrin() const { return cams_intrin; }
+  const std::vector<Eigen::Quaternion<float>> & get_cams2ego_rot() const { return cams2ego_rot; }
+  const std::vector<Eigen::Translation3f> & get_cams2ego_trans() const { return cams2ego_trans; }
+
+  const Eigen::Quaternion<float> & get_lidar2ego_rot() const { return lidar2ego_rot; }
+
+  const Eigen::Translation3f & get_lidar2ego_trans() const { return lidar2ego_trans; }
+
+  int size() { return sample_num; }
+
+  const camsData & data(int idx, bool time_order = true);
+  ~DataLoader();
 
 private:
-    std::vector<int> time_sequence;
-    std::string data_infos_path;
-    int sample_num;
+  std::vector<int> time_sequence;
+  std::string data_infos_path;
+  int sample_num;
 
-    std::vector<std::string> cams_name;
-    int n_img;
-    int img_h;
-    int img_w;
+  std::vector<std::string> cams_name;
+  int n_img;
+  int img_h;
+  int img_w;
 
-    std::vector<camParams> cams_param; 
-    camsData cams_data;
+  std::vector<camParams> cams_param;
+  camsData cams_data;
 
-    std::vector<Eigen::Matrix3f> cams_intrin;
-    std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-    std::vector<Eigen::Translation3f> cams2ego_trans;
-    Eigen::Quaternion<float> lidar2ego_rot;
-    Eigen::Translation3f lidar2ego_trans;
+  std::vector<Eigen::Matrix3f> cams_intrin;
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+  std::vector<Eigen::Translation3f> cams2ego_trans;
+  Eigen::Quaternion<float> lidar2ego_rot;
+  Eigen::Translation3f lidar2ego_trans;
 
 #ifdef __HAVE_NVJPEG__
-    nvjpegDecoder nvdecoder;
+  nvjpegDecoder nvdecoder;
 #endif
-    uchar *imgs_dev;
-    std::vector<std::vector<char>> imgs_data;
-    bool separate;
-
+  uchar * imgs_dev;
+  std::vector<std::vector<char>> imgs_data;
+  bool separate;
 };
 
-
 Eigen::Translation3f fromYamlTrans(YAML::Node x);
 Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
 Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
 
+int read_image(std::string & image_names, std::vector<char> & raw_data);
 
-int read_image(std::string &image_names, std::vector<char> &raw_data);
-
-int read_sample(std::vector<std::string> &imgs_file, 
-                std::vector<std::vector<char>> &imgs_data);
+int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
 
-#endif
\ No newline at end of file
+#endif  // DATA_HPP_
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
index a3d22aacccac8..6da17a2ac28c2 100644
--- a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
@@ -15,85 +15,103 @@
  */
 
 #include <NvInfer.h>
+#include <unistd.h>
+
 #include <cmath>
+#include <iostream>
 #include <map>
 #include <string>
-#include <unistd.h>
 #include <vector>
-#include <iostream>
 
 namespace
 {
-static const char *GATHERBEV_PLUGIN_NAME {"GatherBEV"};
-static const char *GATHERBEV_PLUGIN_VERSION {"1"};
-} // namespace
+static const char * GATHERBEV_PLUGIN_NAME{"GatherBEV"};
+static const char * GATHERBEV_PLUGIN_VERSION{"1"};
+}  // namespace
 
 namespace nvinfer1
 {
 class GatherBEVPlugin : public IPluginV2DynamicExt
 {
 private:
-    const std::string name_;
-    std::string       namespace_;
-    struct{
-    }m_;
+  const std::string name_;
+  std::string namespace_;
+  struct
+  {
+  } m_;
 
 public:
-    GatherBEVPlugin() = delete;
-    GatherBEVPlugin(const std::string &name);
-    GatherBEVPlugin(const std::string &name, const void *buffer, size_t length);
-    ~GatherBEVPlugin();
+  GatherBEVPlugin() = delete;
+  GatherBEVPlugin(const std::string & name);
+  GatherBEVPlugin(const std::string & name, const void * buffer, size_t length);
+  ~GatherBEVPlugin();
 
-    // Method inherited from IPluginV2
-    const char *getPluginType() const noexcept override;
-    const char *getPluginVersion() const noexcept override;
-    int32_t     getNbOutputs() const noexcept override;
-    int32_t     initialize() noexcept override;
-    void        terminate() noexcept override;
-    size_t      getSerializationSize() const noexcept override;
-    void        serialize(void *buffer) const noexcept override;
-    void        destroy() noexcept override;
-    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *getPluginNamespace() const noexcept override;
+  // Method inherited from IPluginV2
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int32_t getNbOutputs() const noexcept override;
+  int32_t initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 
-    // Method inherited from IPluginV2Ext
-    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
-    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
-    void     detachFromContext() noexcept override;
+  // Method inherited from IPluginV2Ext
+  DataType getOutputDataType(
+    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
+  void attachToContext(
+    cudnnContext * contextCudnn, cublasContext * contextCublas,
+    IGpuAllocator * gpuAllocator) noexcept override;
+  void detachFromContext() noexcept override;
 
-    // Method inherited from IPluginV2DynamicExt
-    IPluginV2DynamicExt *clone() const noexcept override;
-    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
-    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
-    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
-    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
-    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  // Method inherited from IPluginV2DynamicExt
+  IPluginV2DynamicExt * clone() const noexcept override;
+  DimsExprs getOutputDimensions(
+    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+    IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
+    int32_t nbOutputs) noexcept override;
+  void configurePlugin(
+    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+    int32_t nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+    int32_t nbOutputs) const noexcept override;
+  int32_t enqueue(
+    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
 
 protected:
-    // To prevent compiler warnings
-    using nvinfer1::IPluginV2::enqueue;
-    using nvinfer1::IPluginV2::getOutputDimensions;
-    using nvinfer1::IPluginV2::getWorkspaceSize;
-    using nvinfer1::IPluginV2Ext::configurePlugin;
+  // To prevent compiler warnings
+  using nvinfer1::IPluginV2::enqueue;
+  using nvinfer1::IPluginV2::getOutputDimensions;
+  using nvinfer1::IPluginV2::getWorkspaceSize;
+  using nvinfer1::IPluginV2Ext::configurePlugin;
 };
 
 class GatherBEVPluginCreator : public IPluginCreator
 {
 private:
-    static PluginFieldCollection    fc_;
-    static std::vector<PluginField> attr_;
-    std::string                     namespace_;
+  static PluginFieldCollection fc_;
+  static std::vector<PluginField> attr_;
+  std::string namespace_;
 
 public:
-    GatherBEVPluginCreator();
-    ~GatherBEVPluginCreator();
-    const char *                 getPluginName() const noexcept override;
-    const char *                 getPluginVersion() const noexcept override;
-    const PluginFieldCollection *getFieldNames() noexcept override;
-    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
-    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
-    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *                 getPluginNamespace() const noexcept override;
+  GatherBEVPluginCreator();
+  ~GatherBEVPluginCreator();
+  const char * getPluginName() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  const PluginFieldCollection * getFieldNames() noexcept override;
+  IPluginV2DynamicExt * createPlugin(
+    const char * name, const PluginFieldCollection * fc) noexcept override;
+  IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 };
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index 5907e966e0927..e58337f4ca2b4 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -5,6 +5,8 @@ All Rights Reserved 2019-2022.
 */
 #pragma once
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -14,37 +16,33 @@ All Rights Reserved 2019-2022.
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 
-#include <vector>
 #include <iostream>
-#include "common.hpp"
-
+#include <vector>
 
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
 const float EPS = 1e-8;
 
-class Iou3dNmsCuda {
- public:
-  Iou3dNmsCuda(const int head_x_size,
-               const int head_y_size,
-               const float nms_overlap_thresh);
+class Iou3dNmsCuda
+{
+public:
+  Iou3dNmsCuda(const int head_x_size, const int head_y_size, const float nms_overlap_thresh);
   ~Iou3dNmsCuda() = default;
 
-  int DoIou3dNms(const int box_num_pre,
-                 const float* res_box,
-                 const int* res_sorted_indices,
-                 long* host_keep_data);
+  int DoIou3dNms(
+    const int box_num_pre, const float * res_box, const int * res_sorted_indices,
+    long * host_keep_data);
 
- private:
+private:
   const int head_x_size_;
   const int head_y_size_;
   const float nms_overlap_thresh_;
 };
 
+static const int kBoxBlockSize = 9;
 
-static const int kBoxBlockSize = 9;  
-
-struct Box {
+struct Box
+{
   float x;
   float y;
   float z;
@@ -57,4 +55,4 @@ struct Box {
   float score;
   int label;
   bool is_drop;  // for nms
-};
\ No newline at end of file
+};
diff --git a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
index be0fcbd995074..4e727893f259c 100644
--- a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
+++ b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
@@ -1,23 +1,22 @@
-#ifndef  __NVJPEGDECODER__
-#define __NVJPEGDECODER__
+#ifndef NVJPEGDECODER_HPP_
+#define NVJPEGDECODER_HPP_
 
 #ifdef __HAVE_NVJPEG__
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <algorithm>
-
-#include <string.h>
+#include "common.hpp"
 
 #include <dirent.h>
+#include <nvjpeg.h>
+#include <string.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <nvjpeg.h>
-#include <thread>
-#include "common.hpp"
 
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <vector>
 
 #define DECODE_RGB 3
 #define DECODE_BGR 4
@@ -25,77 +24,80 @@
 #define DECODE_BGRI 6
 #define DECODE_UNCHANGED 0
 
-
-#define CHECK_NVJPEG(call) { NvJpegAssert((call), __FILE__, __LINE__); }
-
-inline void NvJpegAssert(nvjpegStatus_t code, const char *file, int line){
-    if(code != NVJPEG_STATUS_SUCCESS){
-        std::cout << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line << std::endl; 
-        exit(1);       
-    }
+#define CHECK_NVJPEG(call)                    \
+  {                                           \
+    NvJpegAssert((call), __FILE__, __LINE__); \
+  }
+
+inline void NvJpegAssert(nvjpegStatus_t code, const char * file, int line)
+{
+  if (code != NVJPEG_STATUS_SUCCESS) {
+    std::cout << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line
+              << std::endl;
+    exit(1);
+  }
 }
 
+struct decode_params_t
+{
+  decode_params_t() {}
 
-struct decode_params_t{
-    decode_params_t(){}
+  cudaStream_t stream;
 
-    cudaStream_t stream;
-
-    // used with decoupled API
-    nvjpegBufferPinned_t pinned_buffers[2]; // 2 buffers for pipelining
-    nvjpegBufferDevice_t device_buffer;
-    nvjpegJpegStream_t jpeg_streams[2]; //  2 streams for pipelining
-
-    nvjpegDecodeParams_t nvjpeg_decode_params;
-    nvjpegJpegDecoder_t nvjpeg_decoder;
-    nvjpegJpegState_t nvjpeg_decoupled_state;
+  // used with decoupled API
+  nvjpegBufferPinned_t pinned_buffers[2];  // 2 buffers for pipelining
+  nvjpegBufferDevice_t device_buffer;
+  nvjpegJpegStream_t jpeg_streams[2];  //  2 streams for pipelining
 
+  nvjpegDecodeParams_t nvjpeg_decode_params;
+  nvjpegJpegDecoder_t nvjpeg_decoder;
+  nvjpegJpegState_t nvjpeg_decoupled_state;
 };
 
-struct share_params{
-    share_params(){
-        fmt = NVJPEG_OUTPUT_UNCHANGED;
-    }
-    share_params(size_t _fmt){
-        if(_fmt == DECODE_RGB || _fmt == DECODE_BGR || _fmt == DECODE_RGBI ||
-             _fmt == DECODE_BGRI || _fmt == DECODE_UNCHANGED){
-            fmt = (nvjpegOutputFormat_t)_fmt;
-        }
-        else{
-            std::cerr << "Unknown format! Auto switch BGR!" << std::endl;
-            fmt = NVJPEG_OUTPUT_BGR;
-        }
+struct share_params
+{
+  share_params() { fmt = NVJPEG_OUTPUT_UNCHANGED; }
+  share_params(size_t _fmt)
+  {
+    if (
+      _fmt == DECODE_RGB || _fmt == DECODE_BGR || _fmt == DECODE_RGBI || _fmt == DECODE_BGRI ||
+      _fmt == DECODE_UNCHANGED) {
+      fmt = (nvjpegOutputFormat_t)_fmt;
+    } else {
+      std::cerr << "Unknown format! Auto switch BGR!" << std::endl;
+      fmt = NVJPEG_OUTPUT_BGR;
     }
+  }
 
-    nvjpegOutputFormat_t fmt;
-    nvjpegJpegState_t nvjpeg_state;
-    nvjpegHandle_t nvjpeg_handle;
+  nvjpegOutputFormat_t fmt;
+  nvjpegJpegState_t nvjpeg_state;
+  nvjpegHandle_t nvjpeg_handle;
 
-    bool hw_decode_available;
+  bool hw_decode_available;
 };
 
-
-class nvjpegDecoder{
+class nvjpegDecoder
+{
 public:
-    nvjpegDecoder(){}
-    nvjpegDecoder(size_t n, size_t _fmt);
+  nvjpegDecoder() {}
+  nvjpegDecoder(size_t n, size_t _fmt);
 
-    int decode(const std::vector<std::vector<char>> &imgs_data, uchar* out_imgs);
-    int init();
-    ~nvjpegDecoder();
-private:
-    size_t N_img;
-    std::vector<nvjpegImage_t> iout;
-    std::vector<nvjpegImage_t> isz;
+  int decode(const std::vector<std::vector<char>> & imgs_data, uchar * out_imgs);
+  int init();
+  ~nvjpegDecoder();
 
-    std::vector<int> widths;
-    std::vector<int> heights;
+private:
+  size_t N_img;
+  std::vector<nvjpegImage_t> iout;
+  std::vector<nvjpegImage_t> isz;
 
-    share_params share_param;
-    std::vector<decode_params_t> params;
+  std::vector<int> widths;
+  std::vector<int> heights;
 
+  share_params share_param;
+  std::vector<decode_params_t> params;
 };
 
 #endif
 
-#endif
+#endif  // NVJPEGDECODER_HPP_
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index f89aef2057c0a..149b1c10dd78d 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -1,68 +1,58 @@
-#ifndef __POSTPROCESS_HPP__
-#define __POSTPROCESS_HPP__
+#ifndef POSTPROCESS_HPP_
+#define POSTPROCESS_HPP_
 
-#include <vector>
-
-#include "iou3d_nms.hpp"
 #include "common.hpp"
+#include "iou3d_nms.hpp"
 
+#include <vector>
 
-class PostprocessGPU{
+class PostprocessGPU
+{
 public:
-    PostprocessGPU(){};
-    PostprocessGPU(const int _class_num, 
-                    const float _score_thresh,
-                    const float _nms_thresh, 
-                    const int _nms_pre_maxnum,
-                    const int _nms_post_maxnum, 
-                    const int _down_sample, 
-                    const int _output_h, 
-                    const int _output_w, 
-                    const float _x_step, 
-                    const float _y_step,
-                    const float _x_start, 
-                    const float _y_start,
-                    const std::vector<int>& _class_num_pre_task,
-                    const std::vector<float>& _nms_rescale_factor);
-
-    void DoPostprocess(void ** const bev_buffer, std::vector<Box>& out_detections);
-    ~PostprocessGPU();
+  PostprocessGPU() {};
+  PostprocessGPU(
+    const int _class_num, const float _score_thresh, const float _nms_thresh,
+    const int _nms_pre_maxnum, const int _nms_post_maxnum, const int _down_sample,
+    const int _output_h, const int _output_w, const float _x_step, const float _y_step,
+    const float _x_start, const float _y_start, const std::vector<int> & _class_num_pre_task,
+    const std::vector<float> & _nms_rescale_factor);
 
+  void DoPostprocess(void ** const bev_buffer, std::vector<Box> & out_detections);
+  ~PostprocessGPU();
 
 private:
-    int class_num;
-    float score_thresh;
-    float nms_thresh;
-    int nms_pre_maxnum;
-    int nms_post_maxnum;
-    int down_sample;
-    int output_h;
-    int output_w;
-    float x_step;
-    float y_step;
-    float x_start;
-    float y_start;
-    int map_size;
-    int task_num;
-
-    std::vector<int> class_num_pre_task;
-    std::vector<float> nms_rescale_factor;
-
-    std::unique_ptr<Iou3dNmsCuda> iou3d_nms;
-
-    float* boxes_dev = nullptr;
-    float* score_dev = nullptr;
-    int* cls_dev = nullptr;
-    int* valid_box_num = nullptr;
-    int* sorted_indices_dev = nullptr;
-    long* keep_data_host = nullptr;
-    int* sorted_indices_host = nullptr;
-    float* boxes_host = nullptr;
-    float* score_host = nullptr;
-    int* cls_host = nullptr;
-
-    float* nms_rescale_factor_dev = nullptr;
-
+  int class_num;
+  float score_thresh;
+  float nms_thresh;
+  int nms_pre_maxnum;
+  int nms_post_maxnum;
+  int down_sample;
+  int output_h;
+  int output_w;
+  float x_step;
+  float y_step;
+  float x_start;
+  float y_start;
+  int map_size;
+  int task_num;
+
+  std::vector<int> class_num_pre_task;
+  std::vector<float> nms_rescale_factor;
+
+  std::unique_ptr<Iou3dNmsCuda> iou3d_nms;
+
+  float * boxes_dev = nullptr;
+  float * score_dev = nullptr;
+  int * cls_dev = nullptr;
+  int * valid_box_num = nullptr;
+  int * sorted_indices_dev = nullptr;
+  long * keep_data_host = nullptr;
+  int * sorted_indices_host = nullptr;
+  float * boxes_host = nullptr;
+  float * score_host = nullptr;
+  int * cls_host = nullptr;
+
+  float * nms_rescale_factor_dev = nullptr;
 };
 
-#endif
\ No newline at end of file
+#endif  // POSTPROCESS_HPP_
diff --git a/perception/tensorrt_bevdet/include/preprocess.hpp b/perception/tensorrt_bevdet/include/preprocess.hpp
index 68870bf0b5438..7f8b2cef96781 100644
--- a/perception/tensorrt_bevdet/include/preprocess.hpp
+++ b/perception/tensorrt_bevdet/include/preprocess.hpp
@@ -1,9 +1,8 @@
-#ifndef __PREPROCESS_HPP__
-#define __PREPROCESS_HPP__
+#ifndef PREPROCESS_HPP_
+#define PREPROCESS_HPP_
 
 #include "common.hpp"
 
-void convert_RGBHWC_to_BGRCHW(uchar *input, uchar *output, 
-                                                        int channels, int height, int width);
+void convert_RGBHWC_to_BGRCHW(uchar * input, uchar * output, int channels, int height, int width);
 
-#endif
\ No newline at end of file
+#endif  // PREPROCESS_HPP_
diff --git a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
index 5aa37c26f98e5..bd0ff8bf0b8c2 100644
--- a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
@@ -15,89 +15,105 @@
  */
 
 #include <NvInfer.h>
+#include <unistd.h>
+
 #include <cmath>
+#include <iostream>
 #include <string>
 #include <vector>
-#include <unistd.h>
-#include <iostream>
-
 
 namespace
 {
-static const char *PRE_PLUGIN_NAME {"Preprocess"};
-static const char *PRE_PLUGIN_VERSION {"1"};
-} // namespace
+static const char * PRE_PLUGIN_NAME{"Preprocess"};
+static const char * PRE_PLUGIN_VERSION{"1"};
+}  // namespace
 
 namespace nvinfer1
 {
 class PreprocessPlugin : public IPluginV2DynamicExt
 {
 private:
-    const std::string name_;
-    std::string       namespace_;
-    struct
-    {
-        int crop_h;
-        int crop_w;
-        float resize_radio;
-    } m_;
+  const std::string name_;
+  std::string namespace_;
+  struct
+  {
+    int crop_h;
+    int crop_w;
+    float resize_radio;
+  } m_;
 
 public:
-    PreprocessPlugin() = delete;
-    PreprocessPlugin(const std::string &name, int crop_h, int crop_w, float resize_radio);
-    PreprocessPlugin(const std::string &name, const void *buffer, size_t length);
-    ~PreprocessPlugin();
+  PreprocessPlugin() = delete;
+  PreprocessPlugin(const std::string & name, int crop_h, int crop_w, float resize_radio);
+  PreprocessPlugin(const std::string & name, const void * buffer, size_t length);
+  ~PreprocessPlugin();
 
-    // Method inherited from IPluginV2
-    const char *getPluginType() const noexcept override;
-    const char *getPluginVersion() const noexcept override;
-    int32_t     getNbOutputs() const noexcept override;
-    int32_t     initialize() noexcept override;
-    void        terminate() noexcept override;
-    size_t      getSerializationSize() const noexcept override;
-    void        serialize(void *buffer) const noexcept override;
-    void        destroy() noexcept override;
-    void        setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *getPluginNamespace() const noexcept override;
+  // Method inherited from IPluginV2
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int32_t getNbOutputs() const noexcept override;
+  int32_t initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 
-    // Method inherited from IPluginV2Ext
-    DataType getOutputDataType(int32_t index, DataType const *inputTypes, int32_t nbInputs) const noexcept override;
-    void     attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, IGpuAllocator *gpuAllocator) noexcept override;
-    void     detachFromContext() noexcept override;
+  // Method inherited from IPluginV2Ext
+  DataType getOutputDataType(
+    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
+  void attachToContext(
+    cudnnContext * contextCudnn, cublasContext * contextCublas,
+    IGpuAllocator * gpuAllocator) noexcept override;
+  void detachFromContext() noexcept override;
 
-    // Method inherited from IPluginV2DynamicExt
-    IPluginV2DynamicExt *clone() const noexcept override;
-    DimsExprs            getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder) noexcept override;
-    bool                 supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
-    void                 configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept override;
-    size_t               getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept override;
-    int32_t              enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  // Method inherited from IPluginV2DynamicExt
+  IPluginV2DynamicExt * clone() const noexcept override;
+  DimsExprs getOutputDimensions(
+    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+    IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
+    int32_t nbOutputs) noexcept override;
+  void configurePlugin(
+    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+    int32_t nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+    int32_t nbOutputs) const noexcept override;
+  int32_t enqueue(
+    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
 
 protected:
-    // To prevent compiler warnings
-    using nvinfer1::IPluginV2::enqueue;
-    using nvinfer1::IPluginV2::getOutputDimensions;
-    using nvinfer1::IPluginV2::getWorkspaceSize;
-    using nvinfer1::IPluginV2Ext::configurePlugin;
+  // To prevent compiler warnings
+  using nvinfer1::IPluginV2::enqueue;
+  using nvinfer1::IPluginV2::getOutputDimensions;
+  using nvinfer1::IPluginV2::getWorkspaceSize;
+  using nvinfer1::IPluginV2Ext::configurePlugin;
 };
 
 class PreprocessPluginCreator : public IPluginCreator
 {
 private:
-    static PluginFieldCollection    fc_;
-    static std::vector<PluginField> attr_;
-    std::string                     namespace_;
+  static PluginFieldCollection fc_;
+  static std::vector<PluginField> attr_;
+  std::string namespace_;
 
 public:
-    PreprocessPluginCreator();
-    ~PreprocessPluginCreator();
-    const char *                 getPluginName() const noexcept override;
-    const char *                 getPluginVersion() const noexcept override;
-    const PluginFieldCollection *getFieldNames() noexcept override;
-    IPluginV2DynamicExt *        createPlugin(const char *name, const PluginFieldCollection *fc) noexcept override;
-    IPluginV2DynamicExt *        deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept override;
-    void                         setPluginNamespace(const char *pluginNamespace) noexcept override;
-    const char *                 getPluginNamespace() const noexcept override;
+  PreprocessPluginCreator();
+  ~PreprocessPluginCreator();
+  const char * getPluginName() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  const PluginFieldCollection * getFieldNames() noexcept override;
+  IPluginV2DynamicExt * createPlugin(
+    const char * name, const PluginFieldCollection * fc) noexcept override;
+  IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+  void setPluginNamespace(const char * pluginNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
 };
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 494f812e5d43e..1cfe340596966 100644
--- a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -1,40 +1,40 @@
 <?xml version="1.0"?>
 <launch>
-    <!-- input topic name -->
-    <arg name="input/pointcloud" default="/lidar_top"/>
-    <arg name="input/img_fl" default="/cam_front_left/raw"/>
-    <arg name="input/img_f" default="/cam_front/raw"/>
-    <arg name="input/img_fr" default="/cam_front_right/raw"/>
-    <arg name="input/img_bl" default="/cam_back_left/raw"/>
-    <arg name="input/img_b" default="/cam_back/raw"/>
-    <arg name="input/img_br" default="/cam_back_right/raw"/>
-    <arg name="output/pointcloud" default="points_painting"/>
-    <arg name="output/boxes" default="boxes"/>
+  <!-- input topic name -->
+  <arg name="input/pointcloud" default="/lidar_top"/>
+  <arg name="input/img_fl" default="/cam_front_left/raw"/>
+  <arg name="input/img_f" default="/cam_front/raw"/>
+  <arg name="input/img_fr" default="/cam_front_right/raw"/>
+  <arg name="input/img_bl" default="/cam_back_left/raw"/>
+  <arg name="input/img_b" default="/cam_back/raw"/>
+  <arg name="input/img_br" default="/cam_back_right/raw"/>
+  <arg name="output/pointcloud" default="points_painting"/>
+  <arg name="output/boxes" default="boxes"/>
 
-    <arg name="onnx_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.onnx"/>
-    <arg name="engine_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.engine"/>
-    <arg name="cam_config" default="$(find-pkg-share trt_bevdet)/config/cams_param.yaml"/>
-    <arg name="model_config" default="$(find-pkg-share trt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
-    <arg name="param_file" default="$(find-pkg-share trt_bevdet)/config/bevdet.param.yaml"/>
+  <arg name="onnx_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.onnx"/>
+  <arg name="engine_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.engine"/>
+  <arg name="cam_config" default="$(find-pkg-share trt_bevdet)/config/cams_param.yaml"/>
+  <arg name="model_config" default="$(find-pkg-share trt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
+  <arg name="param_file" default="$(find-pkg-share trt_bevdet)/config/bevdet.param.yaml"/>
 
-    <!-- bevdet node -->
-    <node pkg="tensorrt_bevdet" exec="tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
+  <!-- bevdet node -->
+  <node pkg="tensorrt_bevdet" exec="tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
     <!-- ros topic name -->
-    <remap from="~/input/topic_cloud"  to="$(var input/pointcloud)" />
-    <remap from="~/input/topic_img_fl" to="$(var input/img_fl)" />
-    <remap from="~/input/topic_img_f"  to="$(var input/img_f)" />
-    <remap from="~/input/topic_img_fr" to="$(var input/img_fr)" />
-    <remap from="~/input/topic_img_bl" to="$(var input/img_bl)" />
-    <remap from="~/input/topic_img_b"  to="$(var input/img_b)" />
-    <remap from="~/input/topic_img_br" to="$(var input/img_br)" />
+    <remap from="~/input/topic_cloud" to="$(var input/pointcloud)"/>
+    <remap from="~/input/topic_img_fl" to="$(var input/img_fl)"/>
+    <remap from="~/input/topic_img_f" to="$(var input/img_f)"/>
+    <remap from="~/input/topic_img_fr" to="$(var input/img_fr)"/>
+    <remap from="~/input/topic_img_bl" to="$(var input/img_bl)"/>
+    <remap from="~/input/topic_img_b" to="$(var input/img_b)"/>
+    <remap from="~/input/topic_img_br" to="$(var input/img_br)"/>
 
-    <remap from="~/output/painting_cloud" to="$(var output/pointcloud)" />
-    <remap from="~/output/boxes" to="$(var output/boxes)" />
+    <remap from="~/output/painting_cloud" to="$(var output/pointcloud)"/>
+    <remap from="~/output/boxes" to="$(var output/boxes)"/>
 
-    <param name="onnx_file" value="$(var onnx_file)" />
-    <param name="engine_file" value="$(var engine_file)" />
-    <param name="cam_config" value="$(var cam_config)" />
-    <param name="model_config" value="$(var model_config)" />
+    <param name="onnx_file" value="$(var onnx_file)"/>
+    <param name="engine_file" value="$(var engine_file)"/>
+    <param name="cam_config" value="$(var cam_config)"/>
+    <param name="model_config" value="$(var model_config)"/>
     <param from="$(var param_file)"/>
-    </node>
-</launch>
\ No newline at end of file
+  </node>
+</launch>
diff --git a/perception/tensorrt_bevdet/package.xml b/perception/tensorrt_bevdet/package.xml
index 069a41ae31318..ba75862e6ff2f 100644
--- a/perception/tensorrt_bevdet/package.xml
+++ b/perception/tensorrt_bevdet/package.xml
@@ -18,18 +18,17 @@
   <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
 
   <depend>autoware_perception_msgs</depend>
-  <depend>yaml-cpp</depend>
   <depend>cv_bridge</depend>
+  <depend>geometry_msgs</depend>
   <depend>libopencv-dev</depend>
   <depend>pcl_conversions</depend>
   <depend>pcl_ros</depend>
   <depend>rclcpp</depend>
   <depend>rclcpp_components</depend>
   <depend>sensor_msgs</depend>
-  <depend>geometry_msgs</depend>
-  <depend>tf2_geometry_msgs</depend>
   <depend>tensorrt_common</depend>
-
+  <depend>tf2_geometry_msgs</depend>
+  <depend>yaml-cpp</depend>
 
   <test_depend>ament_lint_auto</test_depend>
   <test_depend>autoware_lint_common</test_depend>
@@ -37,4 +36,4 @@
   <export>
     <build_type>ament_cmake</build_type>
   </export>
-</package>
\ No newline at end of file
+</package>
diff --git a/perception/tensorrt_bevdet/src/alignbev_plugin.cu b/perception/tensorrt_bevdet/src/alignbev_plugin.cu
index a7a56c6b9806d..444de602af910 100644
--- a/perception/tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/tensorrt_bevdet/src/alignbev_plugin.cu
@@ -20,315 +20,333 @@
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
 
-
-static inline __device__ bool within_bounds_2d(int h, int w, int H, int W){
-    return h >= 0 && h < H && w >= 0 && w < W;
+static inline __device__ bool within_bounds_2d(int h, int w, int H, int W)
+{
+  return h >= 0 && h < H && w >= 0 && w < W;
 }
 
 template <typename T1, typename T2>
-__global__ void align_bev_kernel(const int nthreads, const T1 *input,
-                                       const float *trans, T2 *output,
-                                       TensorDesc output_desc){
-    int C = output_desc.shape[1];         // 80
-    int out_H = output_desc.shape[2];     // 128
-    int out_W = output_desc.shape[3];     // 128
-    int out_sN = output_desc.stride[0];   // 80 * 128 * 128
-    int out_sC = output_desc.stride[1];   // 128 * 128
-    int out_sH = output_desc.stride[2];   // 128
-    int out_sW = output_desc.stride[3];   // 1
-
-    CUDA_1D_KERNEL_LOOP(index, nthreads){
-        const int w = index % out_W;               //  j
-        const int h = (index / out_W) % out_H;     //  i
-        const int n = index / (out_H * out_W);     // batch
-
-        float ix = trans[n * 6 + 0 * 3 + 0] * w + 
-                   trans[n * 6 + 0 * 3 + 1] * h + 
-                   trans[n * 6 + 0 * 3 + 2];
-        float iy = trans[n * 6 + 1 * 3 + 0] * w + 
-                   trans[n * 6 + 1 * 3 + 1] * h + 
-                   trans[n * 6 + 1 * 3 + 2];
-
-        // NE, NW, SE, SW point
-        int ix_nw = static_cast<int>(::floor(ix));
-        int iy_nw = static_cast<int>(::floor(iy));
-        int ix_ne = ix_nw + 1;
-        int iy_ne = iy_nw;
-        int ix_sw = ix_nw;
-        int iy_sw = iy_nw + 1;
-        int ix_se = ix_nw + 1;
-        int iy_se = iy_nw + 1;
-
-        T2 nw = (ix_se - ix) * (iy_se - iy);
-        T2 ne = (ix - ix_sw) * (iy_sw - iy);
-        T2 sw = (ix_ne - ix) * (iy - iy_ne);
-        T2 se = (ix - ix_nw) * (iy - iy_nw);
-
-        // bilinear
-        auto inp_ptr_NC = input + n * out_sN;
-        auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
-        for (int c = 0; c < C; ++c, inp_ptr_NC += out_sC, out_ptr_NCHW += out_sC){
-            *out_ptr_NCHW = static_cast<T2>(0);
-            if (within_bounds_2d(iy_nw, ix_nw, out_H, out_W)){
-                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_nw * out_sH + ix_nw * out_sW]) * nw;
-            }
-            if (within_bounds_2d(iy_ne, ix_ne, out_H, out_W)){
-                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_ne * out_sH + ix_ne * out_sW]) * ne;
-            }
-            if (within_bounds_2d(iy_sw, ix_sw, out_H, out_W)){
-                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_sw * out_sH + ix_sw * out_sW]) * sw;
-            }
-            if (within_bounds_2d(iy_se, ix_se, out_H, out_W)){
-                *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_se * out_sH + ix_se * out_sW]) * se;
-            }
-        }
+__global__ void align_bev_kernel(
+  const int nthreads, const T1 * input, const float * trans, T2 * output, TensorDesc output_desc)
+{
+  int C = output_desc.shape[1];        // 80
+  int out_H = output_desc.shape[2];    // 128
+  int out_W = output_desc.shape[3];    // 128
+  int out_sN = output_desc.stride[0];  // 80 * 128 * 128
+  int out_sC = output_desc.stride[1];  // 128 * 128
+  int out_sH = output_desc.stride[2];  // 128
+  int out_sW = output_desc.stride[3];  // 1
+
+  CUDA_1D_KERNEL_LOOP(index, nthreads)
+  {
+    const int w = index % out_W;            //  j
+    const int h = (index / out_W) % out_H;  //  i
+    const int n = index / (out_H * out_W);  // batch
+
+    float ix =
+      trans[n * 6 + 0 * 3 + 0] * w + trans[n * 6 + 0 * 3 + 1] * h + trans[n * 6 + 0 * 3 + 2];
+    float iy =
+      trans[n * 6 + 1 * 3 + 0] * w + trans[n * 6 + 1 * 3 + 1] * h + trans[n * 6 + 1 * 3 + 2];
+
+    // NE, NW, SE, SW point
+    int ix_nw = static_cast<int>(::floor(ix));
+    int iy_nw = static_cast<int>(::floor(iy));
+    int ix_ne = ix_nw + 1;
+    int iy_ne = iy_nw;
+    int ix_sw = ix_nw;
+    int iy_sw = iy_nw + 1;
+    int ix_se = ix_nw + 1;
+    int iy_se = iy_nw + 1;
+
+    T2 nw = (ix_se - ix) * (iy_se - iy);
+    T2 ne = (ix - ix_sw) * (iy_sw - iy);
+    T2 sw = (ix_ne - ix) * (iy - iy_ne);
+    T2 se = (ix - ix_nw) * (iy - iy_nw);
+
+    // bilinear
+    auto inp_ptr_NC = input + n * out_sN;
+    auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
+    for (int c = 0; c < C; ++c, inp_ptr_NC += out_sC, out_ptr_NCHW += out_sC) {
+      *out_ptr_NCHW = static_cast<T2>(0);
+      if (within_bounds_2d(iy_nw, ix_nw, out_H, out_W)) {
+        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_nw * out_sH + ix_nw * out_sW]) * nw;
+      }
+      if (within_bounds_2d(iy_ne, ix_ne, out_H, out_W)) {
+        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_ne * out_sH + ix_ne * out_sW]) * ne;
+      }
+      if (within_bounds_2d(iy_sw, ix_sw, out_H, out_W)) {
+        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_sw * out_sH + ix_sw * out_sW]) * sw;
+      }
+      if (within_bounds_2d(iy_se, ix_se, out_H, out_W)) {
+        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_se * out_sH + ix_se * out_sW]) * se;
+      }
     }
+  }
 }
 
-void create_desc(const int *dims, int nb_dims, TensorDesc &desc){
-    memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims);
-    desc.stride[nb_dims - 1] = 1;
-    for (int i = nb_dims - 2; i >= 0; --i){
-        desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1];
-    }
+void create_desc(const int * dims, int nb_dims, TensorDesc & desc)
+{
+  memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims);
+  desc.stride[nb_dims - 1] = 1;
+  for (int i = nb_dims - 2; i >= 0; --i) {
+    desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1];
+  }
 }
 
-
-namespace nvinfer1 {
+namespace nvinfer1
+{
 // class AlignBEVPlugin
-AlignBEVPlugin::AlignBEVPlugin(const std::string &name):
-    name_(name){
+AlignBEVPlugin::AlignBEVPlugin(const std::string & name) : name_(name)
+{
 }
 
-AlignBEVPlugin::AlignBEVPlugin(const std::string &name, const void *buffer, size_t length):
-    name_(name){
-    memcpy(&m_, buffer, sizeof(m_));
+AlignBEVPlugin::AlignBEVPlugin(const std::string & name, const void * buffer, size_t length)
+: name_(name)
+{
+  memcpy(&m_, buffer, sizeof(m_));
 }
 
-AlignBEVPlugin::~AlignBEVPlugin(){
+AlignBEVPlugin::~AlignBEVPlugin()
+{
 }
 
-IPluginV2DynamicExt *AlignBEVPlugin::clone() const noexcept {
-    auto p = new AlignBEVPlugin(name_, &m_, sizeof(m_));
-    p->setPluginNamespace(namespace_.c_str());
-    return p;
+IPluginV2DynamicExt * AlignBEVPlugin::clone() const noexcept
+{
+  auto p = new AlignBEVPlugin(name_, &m_, sizeof(m_));
+  p->setPluginNamespace(namespace_.c_str());
+  return p;
 }
 
-int32_t AlignBEVPlugin::getNbOutputs() const noexcept {
-    return 1;
+int32_t AlignBEVPlugin::getNbOutputs() const noexcept
+{
+  return 1;
 }
- 
-DataType AlignBEVPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
-                                                                int32_t nbInputs) const noexcept {
-    return DataType::kFLOAT; // FIXME 
+
+DataType AlignBEVPlugin::getOutputDataType(
+  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
+{
+  return DataType::kFLOAT;  // FIXME
 }
 
-DimsExprs AlignBEVPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
-                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
-    
-    return inputs[0]; 
+DimsExprs AlignBEVPlugin::getOutputDimensions(
+  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+  IExprBuilder & exprBuilder) noexcept
+{
+  return inputs[0];
 }
 
-bool AlignBEVPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
-                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
-    // adj_feat  
-    if(pos == 0){
-        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-                inOut[pos].format == TensorFormat::kLINEAR;
-    }    
-    else if(pos == 2){ // out
-        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-                inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 1){ // transform
-        return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    return false;
+bool AlignBEVPlugin::supportsFormatCombination(
+  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+{
+  // adj_feat
+  if (pos == 0) {
+    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+           inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 2) {  // out
+    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+           inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 1) {  // transform
+    return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == TensorFormat::kLINEAR;
+  }
+  return false;
 }
 
-size_t AlignBEVPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
-                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
-    return 0;
+size_t AlignBEVPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+  int32_t nbOutputs) const noexcept
+{
+  return 0;
 }
 
-int32_t AlignBEVPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
-    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
-    
-    // TODO
-    // inputs[0] == adj_feat  b x 8 x 80 x 128 x 128
-    // inputs[1] == transform b x 8 x 6
+int32_t AlignBEVPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  // TODO
+  // inputs[0] == adj_feat  b x 8 x 80 x 128 x 128
+  // inputs[1] == transform b x 8 x 6
 
-    int bev_channel = inputDesc[0].dims.d[2];
-    int bev_h = inputDesc[0].dims.d[3];
-    int bev_w = inputDesc[0].dims.d[4];
-    int adj_num = inputDesc[0].dims.d[1];
+  int bev_channel = inputDesc[0].dims.d[2];
+  int bev_h = inputDesc[0].dims.d[3];
+  int bev_w = inputDesc[0].dims.d[4];
+  int adj_num = inputDesc[0].dims.d[1];
 
-    int output_dim[4] = {8, bev_channel, bev_h, bev_w};
+  int output_dim[4] = {8, bev_channel, bev_h, bev_w};
 
-    TensorDesc output_desc;
-    create_desc(output_dim, 4, output_desc);
+  TensorDesc output_desc;
+  create_desc(output_dim, 4, output_desc);
 
-    int count = 1;
-    for (int i = 0; i < 4; ++i){
-        if (i == 1){
-            continue;
-        }
-        count *= output_desc.shape[i];
+  int count = 1;
+  for (int i = 0; i < 4; ++i) {
+    if (i == 1) {
+      continue;
     }
+    count *= output_desc.shape[i];
+  }
 
-    switch (int(outputDesc[0].type))
-    {
+  switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
-        if(inputDesc[0].type == DataType::kFLOAT){
-            // printf("align : fp32, fp32\n");
-            align_bev_kernel<float, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-                                                    count, 
-                                                    reinterpret_cast<const float *>(inputs[0]), 
-                                                    reinterpret_cast<const float*>(inputs[1]), 
-                                                    reinterpret_cast<float *>(outputs[0]), 
-                                                    output_desc);
-        }
-        else{
-            // printf("align : fp16, fp32\n");
-            align_bev_kernel<__half, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-                                                    count, 
-                                                    reinterpret_cast<const __half *>(inputs[0]), 
-                                                    reinterpret_cast<const float*>(inputs[1]), 
-                                                    reinterpret_cast<float *>(outputs[0]), 
-                                                    output_desc);
-        }
-        break;
+      if (inputDesc[0].type == DataType::kFLOAT) {
+        // printf("align : fp32, fp32\n");
+        align_bev_kernel<float, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+          count, reinterpret_cast<const float *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
+          output_desc);
+      } else {
+        // printf("align : fp16, fp32\n");
+        align_bev_kernel<__half, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+          count, reinterpret_cast<const __half *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
+          output_desc);
+      }
+      break;
     case int(DataType::kHALF):
-        if(inputDesc[0].type == DataType::kFLOAT){
-            // printf("align : fp32, fp16\n");
-            align_bev_kernel<float, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-                                                    count, 
-                                                    reinterpret_cast<const float *>(inputs[0]), 
-                                                    reinterpret_cast<const float*>(inputs[1]), 
-                                                    reinterpret_cast<__half *>(outputs[0]), 
-                                                    output_desc);
-        }
-        else{
-            // printf("align : fp16, fp16\n");
-            align_bev_kernel<__half, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-                                                    count, 
-                                                    reinterpret_cast<const __half *>(inputs[0]), 
-                                                    reinterpret_cast<const float*>(inputs[1]), 
-                                                    reinterpret_cast<__half *>(outputs[0]), 
-                                                    output_desc);
-        }
-        break;
-    default: // should NOT be here
-        printf("\tUnsupport datatype!\n");
-    }
+      if (inputDesc[0].type == DataType::kFLOAT) {
+        // printf("align : fp32, fp16\n");
+        align_bev_kernel<float, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+          count, reinterpret_cast<const float *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
+          output_desc);
+      } else {
+        // printf("align : fp16, fp16\n");
+        align_bev_kernel<__half, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
+          count, reinterpret_cast<const __half *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
+          output_desc);
+      }
+      break;
+    default:  // should NOT be here
+      printf("\tUnsupport datatype!\n");
+  }
 
-
-    return 0;
+  return 0;
 }
 
-void AlignBEVPlugin::destroy() noexcept {
-    delete this;
-    return;
+void AlignBEVPlugin::destroy() noexcept
+{
+  delete this;
+  return;
 }
 
-void AlignBEVPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
-                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
-    return;
+void AlignBEVPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+  int32_t nbOutputs) noexcept
+{
+  return;
 }
 
-int32_t AlignBEVPlugin::initialize() noexcept {
-    return 0;
+int32_t AlignBEVPlugin::initialize() noexcept
+{
+  return 0;
 }
 
-void AlignBEVPlugin::terminate() noexcept {
-    return;
+void AlignBEVPlugin::terminate() noexcept
+{
+  return;
 }
 
-size_t AlignBEVPlugin::getSerializationSize() const noexcept {
-    return sizeof(m_);
+size_t AlignBEVPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(m_);
 }
 
-void AlignBEVPlugin::serialize(void *buffer) const noexcept {
-    memcpy(buffer, &m_, sizeof(m_));
-    return;
+void AlignBEVPlugin::serialize(void * buffer) const noexcept
+{
+  memcpy(buffer, &m_, sizeof(m_));
+  return;
 }
 
-void AlignBEVPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void AlignBEVPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *AlignBEVPlugin::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * AlignBEVPlugin::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *AlignBEVPlugin::getPluginType() const noexcept {
-    return ALIGN_PLUGIN_NAME;
+const char * AlignBEVPlugin::getPluginType() const noexcept
+{
+  return ALIGN_PLUGIN_NAME;
 }
 
-const char *AlignBEVPlugin::getPluginVersion() const noexcept {
-    return ALIGN_PLUGIN_VERSION;
+const char * AlignBEVPlugin::getPluginVersion() const noexcept
+{
+  return ALIGN_PLUGIN_VERSION;
 }
 
-void AlignBEVPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
-                                                        IGpuAllocator *gpuAllocator) noexcept {
-    return;
+void AlignBEVPlugin::attachToContext(
+  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
+{
+  return;
 }
 
-void AlignBEVPlugin::detachFromContext() noexcept {
-    return;
+void AlignBEVPlugin::detachFromContext() noexcept
+{
+  return;
 }
 
 // class AlignBEVPluginCreator
-PluginFieldCollection    AlignBEVPluginCreator::fc_ {};
+PluginFieldCollection AlignBEVPluginCreator::fc_{};
 std::vector<PluginField> AlignBEVPluginCreator::attr_;
 
-AlignBEVPluginCreator::AlignBEVPluginCreator() {
-    attr_.clear();
-    fc_.nbFields = attr_.size();
-    fc_.fields   = attr_.data();
+AlignBEVPluginCreator::AlignBEVPluginCreator()
+{
+  attr_.clear();
+  fc_.nbFields = attr_.size();
+  fc_.fields = attr_.data();
 }
 
-AlignBEVPluginCreator::~AlignBEVPluginCreator() {
+AlignBEVPluginCreator::~AlignBEVPluginCreator()
+{
 }
 
-
-IPluginV2DynamicExt *AlignBEVPluginCreator::createPlugin(const char *name, 
-                                    const PluginFieldCollection *fc) noexcept {
-    // const PluginField *fields = fc->fields;
-    AlignBEVPlugin *pObj = new AlignBEVPlugin(name);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * AlignBEVPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  // const PluginField *fields = fc->fields;
+  AlignBEVPlugin * pObj = new AlignBEVPlugin(name);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-IPluginV2DynamicExt *AlignBEVPluginCreator::deserializePlugin(const char *name, 
-                                        const void *serialData, size_t serialLength) noexcept {
-    AlignBEVPlugin *pObj = new AlignBEVPlugin(name, serialData, serialLength);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * AlignBEVPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  AlignBEVPlugin * pObj = new AlignBEVPlugin(name, serialData, serialLength);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-void AlignBEVPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void AlignBEVPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *AlignBEVPluginCreator::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * AlignBEVPluginCreator::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *AlignBEVPluginCreator::getPluginName() const noexcept {
-    return ALIGN_PLUGIN_NAME;
+const char * AlignBEVPluginCreator::getPluginName() const noexcept
+{
+  return ALIGN_PLUGIN_NAME;
 }
 
-const char *AlignBEVPluginCreator::getPluginVersion() const noexcept {
-    return ALIGN_PLUGIN_VERSION;
+const char * AlignBEVPluginCreator::getPluginVersion() const noexcept
+{
+  return ALIGN_PLUGIN_VERSION;
 }
 
-const PluginFieldCollection *AlignBEVPluginCreator::getFieldNames() noexcept {
-    return &fc_;
+const PluginFieldCollection * AlignBEVPluginCreator::getFieldNames() noexcept
+{
+  return &fc_;
 }
 
 REGISTER_TENSORRT_PLUGIN(AlignBEVPluginCreator);
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
index 8d50bbcbf92c4..c258e2dd6db02 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -1,757 +1,700 @@
-#include <iostream>
-#include <cstdio>
-#include <fstream>
-#include <chrono>
-
-#include <thrust/sort.h>
-#include <thrust/functional.h>
-
 #include "bevdet.hpp"
 
 #include "alignbev_plugin.hpp"
 #include "bevpool_plugin.hpp"
-#include "preprocess_plugin.hpp"
 #include "gatherbev_plugin.hpp"
+#include "preprocess_plugin.hpp"
 
-using std::chrono::duration;
-using std::chrono::high_resolution_clock;
-
-
-BEVDet::BEVDet(const std::string &config_file, int n_img,               
-               std::vector<Eigen::Matrix3f> _cams_intrin, 
-               std::vector<Eigen::Quaternion<float>> _cams2ego_rot, 
-               std::vector<Eigen::Translation3f> _cams2ego_trans,
-               const std::string &onnx_file,
-               const std::string &engine_file) : 
-               cams_intrin(_cams_intrin), 
-               cams2ego_rot(_cams2ego_rot), 
-               cams2ego_trans(_cams2ego_trans){
+#include <thrust/functional.h>
+#include <thrust/sort.h>
 
-    InitParams(config_file);
-    if(n_img != N_img){
-        printf("BEVDet need %d images, but given %d images!", N_img, n_img);
-    }
-    auto start = high_resolution_clock::now();
-
-    std::shared_ptr<int> ranks_bev_ptr = nullptr;
-    std::shared_ptr<int> ranks_depth_ptr = nullptr;
-    std::shared_ptr<int> ranks_feat_ptr = nullptr;
-    std::shared_ptr<int> interval_starts_ptr = nullptr;
-    std::shared_ptr<int> interval_lengths_ptr = nullptr;
-
-
-    InitViewTransformer(ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, 
-                                interval_starts_ptr, interval_lengths_ptr);
-    auto end = high_resolution_clock::now();
-    duration<float> t = end - start;
-    printf("InitVewTransformer cost time : %.4lf ms\n", t.count() * 1000);
-
-    if (access(engine_file.c_str(), F_OK) == 0)
-    {
-        printf("Inference engine prepared.");
-    } 
-    else
-    {
-        //onnx to engine
-        printf("Could not find %s, try making TensorRT engine from onnx", engine_file.c_str());
-        ExportEngine(onnx_file, engine_file);
-    }
-    InitEngine(engine_file); // FIXME
-    MallocDeviceMemory();
+#include <chrono>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
 
-    if(use_adj){
-        adj_frame_ptr.reset(new adjFrame(adj_num, 
-                                        trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
-    }
+using std::chrono::duration;
+using std::chrono::high_resolution_clock;
 
-    cam_params_host = new float[N_img * cam_params_size];
-    
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_bev"]], ranks_bev_ptr.get(), 
-                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_depth"]], ranks_depth_ptr.get(), 
-                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["ranks_feat"]], ranks_feat_ptr.get(), 
-                                        valid_feat_num * sizeof(int), cudaMemcpyHostToDevice));
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["interval_starts"]], interval_starts_ptr.get(), 
-                                        unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["interval_lengths"]], interval_lengths_ptr.get(), 
-                                        unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
-
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["mean"]], mean.data(), 3 * sizeof(float),
-                                                                    cudaMemcpyHostToDevice));
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["std"]], std.data(), 3 * sizeof(float),
-                                                                    cudaMemcpyHostToDevice));                                                     
+BEVDet::BEVDet(
+  const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> _cams_intrin,
+  std::vector<Eigen::Quaternion<float>> _cams2ego_rot,
+  std::vector<Eigen::Translation3f> _cams2ego_trans, const std::string & onnx_file,
+  const std::string & engine_file)
+: cams_intrin(_cams_intrin), cams2ego_rot(_cams2ego_rot), cams2ego_trans(_cams2ego_trans)
+{
+  InitParams(config_file);
+  if (n_img != N_img) {
+    printf("BEVDet need %d images, but given %d images!", N_img, n_img);
+  }
+  auto start = high_resolution_clock::now();
+
+  std::shared_ptr<int> ranks_bev_ptr = nullptr;
+  std::shared_ptr<int> ranks_depth_ptr = nullptr;
+  std::shared_ptr<int> ranks_feat_ptr = nullptr;
+  std::shared_ptr<int> interval_starts_ptr = nullptr;
+  std::shared_ptr<int> interval_lengths_ptr = nullptr;
+
+  InitViewTransformer(
+    ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
+  auto end = high_resolution_clock::now();
+  duration<float> t = end - start;
+  printf("InitVewTransformer cost time : %.4lf ms\n", t.count() * 1000);
+
+  if (access(engine_file.c_str(), F_OK) == 0) {
+    printf("Inference engine prepared.");
+  } else {
+    // onnx to engine
+    printf("Could not find %s, try making TensorRT engine from onnx", engine_file.c_str());
+    ExportEngine(onnx_file, engine_file);
+  }
+  InitEngine(engine_file);  // FIXME
+  MallocDeviceMemory();
+
+  if (use_adj) {
+    adj_frame_ptr.reset(new adjFrame(adj_num, trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
+  }
+
+  cam_params_host = new float[N_img * cam_params_size];
+
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["ranks_bev"]], ranks_bev_ptr.get(), valid_feat_num * sizeof(int),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["ranks_depth"]], ranks_depth_ptr.get(), valid_feat_num * sizeof(int),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["ranks_feat"]], ranks_feat_ptr.get(), valid_feat_num * sizeof(int),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["interval_starts"]], interval_starts_ptr.get(),
+    unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["interval_lengths"]], interval_lengths_ptr.get(),
+    unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["mean"]], mean.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["std"]], std.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
 }
 
-
-
-void BEVDet::InitCamParams(const std::vector<Eigen::Quaternion<float>> &curr_cams2ego_rot,
-                           const std::vector<Eigen::Translation3f> &curr_cams2ego_trans,
-                           const std::vector<Eigen::Matrix3f> &curr_cams_intrin){
-    for(int i = 0; i < N_img; i++){
-        cam_params_host[i * cam_params_size +  0] = curr_cams_intrin[i](0, 0);
-        cam_params_host[i * cam_params_size +  1] = curr_cams_intrin[i](1, 1);
-        cam_params_host[i * cam_params_size +  2] = curr_cams_intrin[i](0, 2);
-        cam_params_host[i * cam_params_size +  3] = curr_cams_intrin[i](1, 2);
-        cam_params_host[i * cam_params_size +  4] = post_rot(0, 0);
-        cam_params_host[i * cam_params_size +  5] = post_rot(0, 1);
-        cam_params_host[i * cam_params_size +  6] = post_trans.translation()(0);
-        cam_params_host[i * cam_params_size +  7] = post_rot(1, 0);
-        cam_params_host[i * cam_params_size +  8] = post_rot(1, 1);
-        cam_params_host[i * cam_params_size +  9] = post_trans.translation()(1);
-        cam_params_host[i * cam_params_size + 10] = 1.f; // bda 0 0
-        cam_params_host[i * cam_params_size + 11] = 0.f; // bda 0 1
-        cam_params_host[i * cam_params_size + 12] = 0.f; // bda 1 0
-        cam_params_host[i * cam_params_size + 13] = 1.f; // bda 1 1
-        cam_params_host[i * cam_params_size + 14] = 1.f; // bda 2 2 
-        cam_params_host[i * cam_params_size + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
-        cam_params_host[i * cam_params_size + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
-        cam_params_host[i * cam_params_size + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
-        cam_params_host[i * cam_params_size + 18] = curr_cams2ego_trans[i].translation()(0);
-        cam_params_host[i * cam_params_size + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
-        cam_params_host[i * cam_params_size + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
-        cam_params_host[i * cam_params_size + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
-        cam_params_host[i * cam_params_size + 22] = curr_cams2ego_trans[i].translation()(1);
-        cam_params_host[i * cam_params_size + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
-        cam_params_host[i * cam_params_size + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
-        cam_params_host[i * cam_params_size + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
-        cam_params_host[i * cam_params_size + 26] = curr_cams2ego_trans[i].translation()(2);
-    }
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["cam_params"]], cam_params_host, 
-                        trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
-    // printf("trans : %d cam : %d\n", transform_size, cam_params_size);
+void BEVDet::InitCamParams(
+  const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
+  const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
+  const std::vector<Eigen::Matrix3f> & curr_cams_intrin)
+{
+  for (int i = 0; i < N_img; i++) {
+    cam_params_host[i * cam_params_size + 0] = curr_cams_intrin[i](0, 0);
+    cam_params_host[i * cam_params_size + 1] = curr_cams_intrin[i](1, 1);
+    cam_params_host[i * cam_params_size + 2] = curr_cams_intrin[i](0, 2);
+    cam_params_host[i * cam_params_size + 3] = curr_cams_intrin[i](1, 2);
+    cam_params_host[i * cam_params_size + 4] = post_rot(0, 0);
+    cam_params_host[i * cam_params_size + 5] = post_rot(0, 1);
+    cam_params_host[i * cam_params_size + 6] = post_trans.translation()(0);
+    cam_params_host[i * cam_params_size + 7] = post_rot(1, 0);
+    cam_params_host[i * cam_params_size + 8] = post_rot(1, 1);
+    cam_params_host[i * cam_params_size + 9] = post_trans.translation()(1);
+    cam_params_host[i * cam_params_size + 10] = 1.f;  // bda 0 0
+    cam_params_host[i * cam_params_size + 11] = 0.f;  // bda 0 1
+    cam_params_host[i * cam_params_size + 12] = 0.f;  // bda 1 0
+    cam_params_host[i * cam_params_size + 13] = 1.f;  // bda 1 1
+    cam_params_host[i * cam_params_size + 14] = 1.f;  // bda 2 2
+    cam_params_host[i * cam_params_size + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
+    cam_params_host[i * cam_params_size + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
+    cam_params_host[i * cam_params_size + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
+    cam_params_host[i * cam_params_size + 18] = curr_cams2ego_trans[i].translation()(0);
+    cam_params_host[i * cam_params_size + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
+    cam_params_host[i * cam_params_size + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
+    cam_params_host[i * cam_params_size + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
+    cam_params_host[i * cam_params_size + 22] = curr_cams2ego_trans[i].translation()(1);
+    cam_params_host[i * cam_params_size + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
+    cam_params_host[i * cam_params_size + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
+    cam_params_host[i * cam_params_size + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
+    cam_params_host[i * cam_params_size + 26] = curr_cams2ego_trans[i].translation()(2);
+  }
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["cam_params"]], cam_params_host,
+    trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
+  // printf("trans : %d cam : %d\n", transform_size, cam_params_size);
 }
 
-
-void BEVDet::InitParams(const std::string &config_file){
-    mean = std::vector<float>(3);
-    std = std::vector<float>(3);
-
-    YAML::Node model_config = YAML::LoadFile(config_file);
-    N_img = model_config["data_config"]["Ncams"].as<int>();
-    src_img_h = model_config["data_config"]["src_size"][0].as<int>();
-    src_img_w = model_config["data_config"]["src_size"][1].as<int>();
-    input_img_h = model_config["data_config"]["input_size"][0].as<int>();
-    input_img_w = model_config["data_config"]["input_size"][1].as<int>();
-    crop_h = model_config["data_config"]["crop"][0].as<int>();
-    crop_w = model_config["data_config"]["crop"][1].as<int>();
-    mean[0] = model_config["mean"][0].as<float>();
-    mean[1] = model_config["mean"][1].as<float>();
-    mean[2] = model_config["mean"][2].as<float>();
-    std[0] = model_config["std"][0].as<float>();
-    std[1] = model_config["std"][1].as<float>();
-    std[2] = model_config["std"][2].as<float>();
-    down_sample = model_config["model"]["down_sample"].as<int>();
-    depth_start = model_config["grid_config"]["depth"][0].as<float>();
-    depth_end = model_config["grid_config"]["depth"][1].as<float>();
-    depth_step = model_config["grid_config"]["depth"][2].as<float>();
-    x_start = model_config["grid_config"]["x"][0].as<float>();
-    x_end = model_config["grid_config"]["x"][1].as<float>();
-    x_step = model_config["grid_config"]["x"][2].as<float>();
-    y_start = model_config["grid_config"]["y"][0].as<float>();
-    y_end = model_config["grid_config"]["y"][1].as<float>();
-    y_step = model_config["grid_config"]["y"][2].as<float>();
-    z_start = model_config["grid_config"]["z"][0].as<float>();
-    z_end = model_config["grid_config"]["z"][1].as<float>();
-    z_step = model_config["grid_config"]["z"][2].as<float>();
-    bevpool_channel = model_config["model"]["bevpool_channels"].as<int>();
-    nms_pre_maxnum = model_config["test_cfg"]["max_per_img"].as<int>();
-    nms_post_maxnum = model_config["test_cfg"]["post_max_size"].as<int>();
-    score_thresh = model_config["test_cfg"]["score_threshold"].as<float>();
-    nms_overlap_thresh = model_config["test_cfg"]["nms_thr"][0].as<float>();
-    use_depth = model_config["use_depth"].as<bool>();
-    use_adj = model_config["use_adj"].as<bool>();
-    transform_size = model_config["transform_size"].as<int>();
-    cam_params_size = model_config["cam_params_size"].as<int>();
-
-    std::vector<std::vector<float>> nms_factor_temp = model_config["test_cfg"]
-                            ["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
-    nms_rescale_factor.clear();
-    for(auto task_factors : nms_factor_temp){
-        for(float factor : task_factors){
-            nms_rescale_factor.push_back(factor);
-        }
-    }
-    
-    std::vector<std::vector<std::string>> class_name_pre_task;
-    class_num = 0;
-    YAML::Node tasks = model_config["model"]["tasks"];
-    class_num_pre_task = std::vector<int>();
-    for(auto it : tasks){
-        int num = it["num_class"].as<int>();
-        class_num_pre_task.push_back(num);
-        class_num += num;
-        class_name_pre_task.push_back(it["class_names"].as<std::vector<std::string>>());
-    }
-
-    YAML::Node common_head_channel = model_config["model"]["common_head"]["channels"];
-    YAML::Node common_head_name = model_config["model"]["common_head"]["names"];
-    for(size_t i = 0; i< common_head_channel.size(); i++){
-        out_num_task_head[common_head_name[i].as<std::string>()] = 
-                                                        common_head_channel[i].as<int>();
-    }
-
-    resize_radio = (float)input_img_w / src_img_w;
-    feat_h = input_img_h / down_sample;
-    feat_w = input_img_w / down_sample;
-    depth_num = (depth_end - depth_start) / depth_step;
-    xgrid_num = (x_end - x_start) / x_step;
-    ygrid_num = (y_end - y_start) / y_step;
-    zgrid_num = (z_end - z_start) / z_step;
-    bev_h = ygrid_num;
-    bev_w = xgrid_num;
-
-
-    post_rot << resize_radio, 0, 0,
-                0, resize_radio, 0,
-                0, 0, 1;
-    post_trans.translation() << -crop_w, -crop_h, 0;
-
-    adj_num = 0;
-    if(use_adj){
-        adj_num = model_config["adj_num"].as<int>();
-    }
-
-
-    postprocess_ptr.reset(new PostprocessGPU(class_num, score_thresh, nms_overlap_thresh,
-                                            nms_pre_maxnum, nms_post_maxnum, down_sample,
-                                            bev_h, bev_w, x_step, y_step, x_start,
-                                            y_start, class_num_pre_task, nms_rescale_factor));
-
+void BEVDet::InitParams(const std::string & config_file)
+{
+  mean = std::vector<float>(3);
+  std = std::vector<float>(3);
+
+  YAML::Node model_config = YAML::LoadFile(config_file);
+  N_img = model_config["data_config"]["Ncams"].as<int>();
+  src_img_h = model_config["data_config"]["src_size"][0].as<int>();
+  src_img_w = model_config["data_config"]["src_size"][1].as<int>();
+  input_img_h = model_config["data_config"]["input_size"][0].as<int>();
+  input_img_w = model_config["data_config"]["input_size"][1].as<int>();
+  crop_h = model_config["data_config"]["crop"][0].as<int>();
+  crop_w = model_config["data_config"]["crop"][1].as<int>();
+  mean[0] = model_config["mean"][0].as<float>();
+  mean[1] = model_config["mean"][1].as<float>();
+  mean[2] = model_config["mean"][2].as<float>();
+  std[0] = model_config["std"][0].as<float>();
+  std[1] = model_config["std"][1].as<float>();
+  std[2] = model_config["std"][2].as<float>();
+  down_sample = model_config["model"]["down_sample"].as<int>();
+  depth_start = model_config["grid_config"]["depth"][0].as<float>();
+  depth_end = model_config["grid_config"]["depth"][1].as<float>();
+  depth_step = model_config["grid_config"]["depth"][2].as<float>();
+  x_start = model_config["grid_config"]["x"][0].as<float>();
+  x_end = model_config["grid_config"]["x"][1].as<float>();
+  x_step = model_config["grid_config"]["x"][2].as<float>();
+  y_start = model_config["grid_config"]["y"][0].as<float>();
+  y_end = model_config["grid_config"]["y"][1].as<float>();
+  y_step = model_config["grid_config"]["y"][2].as<float>();
+  z_start = model_config["grid_config"]["z"][0].as<float>();
+  z_end = model_config["grid_config"]["z"][1].as<float>();
+  z_step = model_config["grid_config"]["z"][2].as<float>();
+  bevpool_channel = model_config["model"]["bevpool_channels"].as<int>();
+  nms_pre_maxnum = model_config["test_cfg"]["max_per_img"].as<int>();
+  nms_post_maxnum = model_config["test_cfg"]["post_max_size"].as<int>();
+  score_thresh = model_config["test_cfg"]["score_threshold"].as<float>();
+  nms_overlap_thresh = model_config["test_cfg"]["nms_thr"][0].as<float>();
+  use_depth = model_config["use_depth"].as<bool>();
+  use_adj = model_config["use_adj"].as<bool>();
+  transform_size = model_config["transform_size"].as<int>();
+  cam_params_size = model_config["cam_params_size"].as<int>();
+
+  std::vector<std::vector<float>> nms_factor_temp =
+    model_config["test_cfg"]["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
+  nms_rescale_factor.clear();
+  for (auto task_factors : nms_factor_temp) {
+    for (float factor : task_factors) {
+      nms_rescale_factor.push_back(factor);
+    }
+  }
+
+  std::vector<std::vector<std::string>> class_name_pre_task;
+  class_num = 0;
+  YAML::Node tasks = model_config["model"]["tasks"];
+  class_num_pre_task = std::vector<int>();
+  for (auto it : tasks) {
+    int num = it["num_class"].as<int>();
+    class_num_pre_task.push_back(num);
+    class_num += num;
+    class_name_pre_task.push_back(it["class_names"].as<std::vector<std::string>>());
+  }
+
+  YAML::Node common_head_channel = model_config["model"]["common_head"]["channels"];
+  YAML::Node common_head_name = model_config["model"]["common_head"]["names"];
+  for (size_t i = 0; i < common_head_channel.size(); i++) {
+    out_num_task_head[common_head_name[i].as<std::string>()] = common_head_channel[i].as<int>();
+  }
+
+  resize_radio = (float)input_img_w / src_img_w;
+  feat_h = input_img_h / down_sample;
+  feat_w = input_img_w / down_sample;
+  depth_num = (depth_end - depth_start) / depth_step;
+  xgrid_num = (x_end - x_start) / x_step;
+  ygrid_num = (y_end - y_start) / y_step;
+  zgrid_num = (z_end - z_start) / z_step;
+  bev_h = ygrid_num;
+  bev_w = xgrid_num;
+
+  post_rot << resize_radio, 0, 0, 0, resize_radio, 0, 0, 0, 1;
+  post_trans.translation() << -crop_w, -crop_h, 0;
+
+  adj_num = 0;
+  if (use_adj) {
+    adj_num = model_config["adj_num"].as<int>();
+  }
+
+  postprocess_ptr.reset(new PostprocessGPU(
+    class_num, score_thresh, nms_overlap_thresh, nms_pre_maxnum, nms_post_maxnum, down_sample,
+    bev_h, bev_w, x_step, y_step, x_start, y_start, class_num_pre_task, nms_rescale_factor));
 }
 
-void BEVDet::MallocDeviceMemory(){
-
-    trt_buffer_sizes.resize(trt_engine->getNbBindings());
-    trt_buffer_dev = (void**)new void*[trt_engine->getNbBindings()];
-    for(int i = 0; i < trt_engine->getNbBindings(); i++){
-        nvinfer1::Dims32 dim = trt_context->getBindingDimensions(i);
-        size_t size = 1;
-        for(int j = 0; j < dim.nbDims; j++){
-            size *= dim.d[j];
-        }
-        size *= dataTypeToSize(trt_engine->getBindingDataType(i));
-        trt_buffer_sizes[i] = size;
-        CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
-    }
-
-    std::cout << "img num binding : " << trt_engine->getNbBindings() << std::endl;
-
-    post_buffer = (void**)new void*[class_num_pre_task.size() * 6];
-    for(size_t i = 0; i < class_num_pre_task.size(); i++){
-        post_buffer[i * 6 + 0] = trt_buffer_dev[buffer_map["reg_" + std::to_string(i)]];
-        post_buffer[i * 6 + 1] = trt_buffer_dev[buffer_map["height_" + std::to_string(i)]];
-        post_buffer[i * 6 + 2] = trt_buffer_dev[buffer_map["dim_" + std::to_string(i)]];
-        post_buffer[i * 6 + 3] = trt_buffer_dev[buffer_map["rot_" + std::to_string(i)]];
-        post_buffer[i * 6 + 4] = trt_buffer_dev[buffer_map["vel_" + std::to_string(i)]];
-        post_buffer[i * 6 + 5] = trt_buffer_dev[buffer_map["heatmap_" + std::to_string(i)]];
-    }
-
-    return;
+void BEVDet::MallocDeviceMemory()
+{
+  trt_buffer_sizes.resize(trt_engine->getNbBindings());
+  trt_buffer_dev = (void **)new void *[trt_engine->getNbBindings()];
+  for (int i = 0; i < trt_engine->getNbBindings(); i++) {
+    nvinfer1::Dims32 dim = trt_context->getBindingDimensions(i);
+    size_t size = 1;
+    for (int j = 0; j < dim.nbDims; j++) {
+      size *= dim.d[j];
+    }
+    size *= dataTypeToSize(trt_engine->getBindingDataType(i));
+    trt_buffer_sizes[i] = size;
+    CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
+  }
+
+  std::cout << "img num binding : " << trt_engine->getNbBindings() << std::endl;
+
+  post_buffer = (void **)new void *[class_num_pre_task.size() * 6];
+  for (size_t i = 0; i < class_num_pre_task.size(); i++) {
+    post_buffer[i * 6 + 0] = trt_buffer_dev[buffer_map["reg_" + std::to_string(i)]];
+    post_buffer[i * 6 + 1] = trt_buffer_dev[buffer_map["height_" + std::to_string(i)]];
+    post_buffer[i * 6 + 2] = trt_buffer_dev[buffer_map["dim_" + std::to_string(i)]];
+    post_buffer[i * 6 + 3] = trt_buffer_dev[buffer_map["rot_" + std::to_string(i)]];
+    post_buffer[i * 6 + 4] = trt_buffer_dev[buffer_map["vel_" + std::to_string(i)]];
+    post_buffer[i * 6 + 5] = trt_buffer_dev[buffer_map["heatmap_" + std::to_string(i)]];
+  }
+
+  return;
 }
 
-
-void BEVDet::InitViewTransformer(std::shared_ptr<int> &ranks_bev_ptr, 
-                                 std::shared_ptr<int> &ranks_depth_ptr, 
-                                 std::shared_ptr<int> &ranks_feat_ptr, 
-                                 std::shared_ptr<int> &interval_starts_ptr, 
-                                 std::shared_ptr<int> &interval_lengths_ptr){
-
-    int num_points = N_img * depth_num * feat_h * feat_w;
-    Eigen::Vector3f* frustum = new Eigen::Vector3f[num_points];
-
-    for(int i = 0; i < N_img; i++){
-        for(int d_ = 0; d_ < depth_num; d_++){
-            for(int h_ = 0; h_ < feat_h; h_++){
-                for(int w_ = 0; w_ < feat_w; w_++){
-                    int offset = i * depth_num * feat_h * feat_w + d_ * feat_h * feat_w
-                                                                 + h_ * feat_w + w_;
-                    (frustum + offset)->x() = (float)w_ * (input_img_w - 1) / (feat_w - 1);
-                    (frustum + offset)->y() = (float)h_ * (input_img_h - 1) / (feat_h - 1);
-                    (frustum + offset)->z() = (float)d_ * depth_step + depth_start;
-
-                    // eliminate post transformation
-                    *(frustum + offset) -= post_trans.translation();
-                    *(frustum + offset) = post_rot.inverse() * *(frustum + offset);
-                    // 
-                    (frustum + offset)->x() *= (frustum + offset)->z();
-                    (frustum + offset)->y() *= (frustum + offset)->z();
-                    // img to ego -> rot -> trans
-                    *(frustum + offset) = cams2ego_rot[i] * cams_intrin[i].inverse()
-                                    * *(frustum + offset) + cams2ego_trans[i].translation();
-
-                    // voxelization
-                    *(frustum + offset) -= Eigen::Vector3f(x_start, y_start, z_start);
-                    (frustum + offset)->x() = (int)((frustum + offset)->x() / x_step);
-                    (frustum + offset)->y() = (int)((frustum + offset)->y() / y_step);
-                    (frustum + offset)->z() = (int)((frustum + offset)->z() / z_step);
-                }
-            }
-        }
-    }
-
-    int* _ranks_depth = new int[num_points];
-    int* _ranks_feat = new int[num_points];
-
-    for(int i = 0; i < num_points; i++){
-        _ranks_depth[i] = i;
-    }
-    for(int i = 0; i < N_img; i++){
-        for(int d_ = 0; d_ < depth_num; d_++){
-            for(int u = 0; u < feat_h * feat_w; u++){
-                int offset = i * (depth_num * feat_h * feat_w) + d_ * (feat_h * feat_w) + u;
-                _ranks_feat[offset] = i * feat_h * feat_w + u;
-            }
-        }
-    }
-
-    std::vector<int> kept;
-    for(int i = 0; i < num_points; i++){
-        if((int)(frustum + i)->x() >= 0 && (int)(frustum + i)->x() < xgrid_num &&
-           (int)(frustum + i)->y() >= 0 && (int)(frustum + i)->y() < ygrid_num &&
-           (int)(frustum + i)->z() >= 0 && (int)(frustum + i)->z() < zgrid_num){
-            kept.push_back(i);
+void BEVDet::InitViewTransformer(
+  std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
+  std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
+  std::shared_ptr<int> & interval_lengths_ptr)
+{
+  int num_points = N_img * depth_num * feat_h * feat_w;
+  Eigen::Vector3f * frustum = new Eigen::Vector3f[num_points];
+
+  for (int i = 0; i < N_img; i++) {
+    for (int d_ = 0; d_ < depth_num; d_++) {
+      for (int h_ = 0; h_ < feat_h; h_++) {
+        for (int w_ = 0; w_ < feat_w; w_++) {
+          int offset = i * depth_num * feat_h * feat_w + d_ * feat_h * feat_w + h_ * feat_w + w_;
+          (frustum + offset)->x() = (float)w_ * (input_img_w - 1) / (feat_w - 1);
+          (frustum + offset)->y() = (float)h_ * (input_img_h - 1) / (feat_h - 1);
+          (frustum + offset)->z() = (float)d_ * depth_step + depth_start;
+
+          // eliminate post transformation
+          *(frustum + offset) -= post_trans.translation();
+          *(frustum + offset) = post_rot.inverse() * *(frustum + offset);
+          //
+          (frustum + offset)->x() *= (frustum + offset)->z();
+          (frustum + offset)->y() *= (frustum + offset)->z();
+          // img to ego -> rot -> trans
+          *(frustum + offset) = cams2ego_rot[i] * cams_intrin[i].inverse() * *(frustum + offset) +
+                                cams2ego_trans[i].translation();
+
+          // voxelization
+          *(frustum + offset) -= Eigen::Vector3f(x_start, y_start, z_start);
+          (frustum + offset)->x() = (int)((frustum + offset)->x() / x_step);
+          (frustum + offset)->y() = (int)((frustum + offset)->y() / y_step);
+          (frustum + offset)->z() = (int)((frustum + offset)->z() / z_step);
         }
-    }
-
-    valid_feat_num = kept.size();
-    int* ranks_depth_host = new int[valid_feat_num];
-    int* ranks_feat_host = new int[valid_feat_num];
-    int* ranks_bev_host = new int[valid_feat_num];
-    int* order = new int[valid_feat_num];
-
-    for(int i = 0; i < valid_feat_num; i++){
-        Eigen::Vector3f &p = frustum[kept[i]];
-        ranks_bev_host[i] = (int)p.z() * xgrid_num * ygrid_num + 
-                            (int)p.y() * xgrid_num + (int)p.x();
-        order[i] = i;
-    }
-
-    thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num, order);
-    for(int i = 0; i < valid_feat_num; i++){
-        ranks_depth_host[i] = _ranks_depth[kept[order[i]]];
-        ranks_feat_host[i] = _ranks_feat[kept[order[i]]];
-    }
-
-    delete[] _ranks_depth;
-    delete[] _ranks_feat;
-    delete[] frustum;
-    delete[] order;
-
-    std::vector<int> interval_starts_host;
-    std::vector<int> interval_lengths_host;
-
-    interval_starts_host.push_back(0);
-    int len = 1;
-    for(int i = 1; i < valid_feat_num; i++){
-        if(ranks_bev_host[i] != ranks_bev_host[i - 1]){
-            interval_starts_host.push_back(i);
-            interval_lengths_host.push_back(len);
-            len=1;
-        }
-        else{
-            len++;
-        }
-    }
-    
-    interval_lengths_host.push_back(len);
-    unique_bev_num = interval_lengths_host.size();
-
-
-    int* interval_starts_host_ptr = new int[interval_starts_host.size()];
-    int* interval_lengths_host_ptr = new int[interval_lengths_host.size()];
-
-    memcpy(interval_starts_host_ptr, interval_starts_host.data(), 
-                                            interval_starts_host.size() * sizeof(int));
-    memcpy(interval_lengths_host_ptr, interval_lengths_host.data(), 
-                                            interval_lengths_host.size() * sizeof(int));
-
-    ranks_bev_ptr.reset(ranks_bev_host);
-    ranks_depth_ptr.reset(ranks_depth_host);
-    ranks_feat_ptr.reset(ranks_feat_host);
-    interval_starts_ptr.reset(interval_starts_host_ptr);
-    interval_lengths_ptr.reset(interval_lengths_host_ptr);
-
-    printf("valid_feat_num: %d \n", valid_feat_num);
-    printf("unique_bev_num: %d \n", unique_bev_num);
+      }
+    }
+  }
+
+  int * _ranks_depth = new int[num_points];
+  int * _ranks_feat = new int[num_points];
+
+  for (int i = 0; i < num_points; i++) {
+    _ranks_depth[i] = i;
+  }
+  for (int i = 0; i < N_img; i++) {
+    for (int d_ = 0; d_ < depth_num; d_++) {
+      for (int u = 0; u < feat_h * feat_w; u++) {
+        int offset = i * (depth_num * feat_h * feat_w) + d_ * (feat_h * feat_w) + u;
+        _ranks_feat[offset] = i * feat_h * feat_w + u;
+      }
+    }
+  }
+
+  std::vector<int> kept;
+  for (int i = 0; i < num_points; i++) {
+    if (
+      (int)(frustum + i)->x() >= 0 && (int)(frustum + i)->x() < xgrid_num &&
+      (int)(frustum + i)->y() >= 0 && (int)(frustum + i)->y() < ygrid_num &&
+      (int)(frustum + i)->z() >= 0 && (int)(frustum + i)->z() < zgrid_num) {
+      kept.push_back(i);
+    }
+  }
+
+  valid_feat_num = kept.size();
+  int * ranks_depth_host = new int[valid_feat_num];
+  int * ranks_feat_host = new int[valid_feat_num];
+  int * ranks_bev_host = new int[valid_feat_num];
+  int * order = new int[valid_feat_num];
+
+  for (int i = 0; i < valid_feat_num; i++) {
+    Eigen::Vector3f & p = frustum[kept[i]];
+    ranks_bev_host[i] = (int)p.z() * xgrid_num * ygrid_num + (int)p.y() * xgrid_num + (int)p.x();
+    order[i] = i;
+  }
+
+  thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num, order);
+  for (int i = 0; i < valid_feat_num; i++) {
+    ranks_depth_host[i] = _ranks_depth[kept[order[i]]];
+    ranks_feat_host[i] = _ranks_feat[kept[order[i]]];
+  }
+
+  delete[] _ranks_depth;
+  delete[] _ranks_feat;
+  delete[] frustum;
+  delete[] order;
+
+  std::vector<int> interval_starts_host;
+  std::vector<int> interval_lengths_host;
+
+  interval_starts_host.push_back(0);
+  int len = 1;
+  for (int i = 1; i < valid_feat_num; i++) {
+    if (ranks_bev_host[i] != ranks_bev_host[i - 1]) {
+      interval_starts_host.push_back(i);
+      interval_lengths_host.push_back(len);
+      len = 1;
+    } else {
+      len++;
+    }
+  }
+
+  interval_lengths_host.push_back(len);
+  unique_bev_num = interval_lengths_host.size();
+
+  int * interval_starts_host_ptr = new int[interval_starts_host.size()];
+  int * interval_lengths_host_ptr = new int[interval_lengths_host.size()];
+
+  memcpy(
+    interval_starts_host_ptr, interval_starts_host.data(),
+    interval_starts_host.size() * sizeof(int));
+  memcpy(
+    interval_lengths_host_ptr, interval_lengths_host.data(),
+    interval_lengths_host.size() * sizeof(int));
+
+  ranks_bev_ptr.reset(ranks_bev_host);
+  ranks_depth_ptr.reset(ranks_depth_host);
+  ranks_feat_ptr.reset(ranks_feat_host);
+  interval_starts_ptr.reset(interval_starts_host_ptr);
+  interval_lengths_ptr.reset(interval_lengths_host_ptr);
+
+  printf("valid_feat_num: %d \n", valid_feat_num);
+  printf("unique_bev_num: %d \n", unique_bev_num);
 }
 
-
-void print_dim(nvinfer1::Dims dim){
-    for(auto i = 0; i < dim.nbDims; i++){
-        printf("%d%c", dim.d[i], i == dim.nbDims - 1 ? '\n' : ' ');
-    }
+void print_dim(nvinfer1::Dims dim)
+{
+  for (auto i = 0; i < dim.nbDims; i++) {
+    printf("%d%c", dim.d[i], i == dim.nbDims - 1 ? '\n' : ' ');
+  }
 }
 
-int BEVDet::InitEngine(const std::string &engine_file){
-
-    if(DeserializeTRTEngine(engine_file, &trt_engine)){
-        return EXIT_FAILURE;
-    }
-
-    if(trt_engine == nullptr){
-        std::cerr << "Failed to deserialize engine file!" << std::endl;
-        return EXIT_FAILURE;
-    }
-    trt_context = trt_engine->createExecutionContext();
-
-    if (trt_context == nullptr) {
-        std::cerr << "Failed to create TensorRT Execution Context!" << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    // set bindings
-    std::vector<nvinfer1::Dims32> shapes{
-        {4, {N_img, 3, src_img_h, src_img_w  / 4}},
-        {1, {3}},
-        {1, {3}},
-        {3, {1, N_img, cam_params_size}},
-        {1, {valid_feat_num}},
-        {1, {valid_feat_num}},
-        {1, {valid_feat_num}},
-        {1, {unique_bev_num}},
-        {1, {unique_bev_num}},
-        {5, {1, adj_num, bevpool_channel, bev_h, bev_w}},
-        {3, {1, adj_num, transform_size}},
-        {2, {1, 1}}
-    };
-
-
-
-    for(size_t i = 0; i < shapes.size(); i++){
-         trt_context->setBindingDimensions(i, shapes[i]);
-    }
-
-    buffer_map.clear();
-    for(auto i = 0; i < trt_engine->getNbBindings(); i++){
-        auto dim = trt_context->getBindingDimensions(i);
-        auto name = trt_engine->getBindingName(i);
-        buffer_map[name] = i;
-        std::cout << name << " : ";
-        print_dim(dim);
-    }
-    std::cout << std::endl;
-
-    return EXIT_SUCCESS;
+int BEVDet::InitEngine(const std::string & engine_file)
+{
+  if (DeserializeTRTEngine(engine_file, &trt_engine)) {
+    return EXIT_FAILURE;
+  }
+
+  if (trt_engine == nullptr) {
+    std::cerr << "Failed to deserialize engine file!" << std::endl;
+    return EXIT_FAILURE;
+  }
+  trt_context = trt_engine->createExecutionContext();
+
+  if (trt_context == nullptr) {
+    std::cerr << "Failed to create TensorRT Execution Context!" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // set bindings
+  std::vector<nvinfer1::Dims32> shapes{
+    {4, {N_img, 3, src_img_h, src_img_w / 4}},
+    {1, {3}},
+    {1, {3}},
+    {3, {1, N_img, cam_params_size}},
+    {1, {valid_feat_num}},
+    {1, {valid_feat_num}},
+    {1, {valid_feat_num}},
+    {1, {unique_bev_num}},
+    {1, {unique_bev_num}},
+    {5, {1, adj_num, bevpool_channel, bev_h, bev_w}},
+    {3, {1, adj_num, transform_size}},
+    {2, {1, 1}}};
+
+  for (size_t i = 0; i < shapes.size(); i++) {
+    trt_context->setBindingDimensions(i, shapes[i]);
+  }
+
+  buffer_map.clear();
+  for (auto i = 0; i < trt_engine->getNbBindings(); i++) {
+    auto dim = trt_context->getBindingDimensions(i);
+    auto name = trt_engine->getBindingName(i);
+    buffer_map[name] = i;
+    std::cout << name << " : ";
+    print_dim(dim);
+  }
+  std::cout << std::endl;
+
+  return EXIT_SUCCESS;
 }
-void save_tensor(size_t size, const void * ptr, const std::string& file){
-    float* tensor = new float[size];
-    CHECK_CUDA(cudaMemcpy(tensor, ptr, size * sizeof(float), cudaMemcpyDeviceToHost));
-    std::ofstream out(file, std::ios::out | std::ios::binary);
-    out.write((char*)tensor, size * sizeof(float));
-    out.close();
-    delete[] tensor;
+void save_tensor(size_t size, const void * ptr, const std::string & file)
+{
+  float * tensor = new float[size];
+  CHECK_CUDA(cudaMemcpy(tensor, ptr, size * sizeof(float), cudaMemcpyDeviceToHost));
+  std::ofstream out(file, std::ios::out | std::ios::binary);
+  out.write((char *)tensor, size * sizeof(float));
+  out.close();
+  delete[] tensor;
 }
 
-
-int BEVDet::DeserializeTRTEngine(const std::string &engine_file, 
-                                 nvinfer1::ICudaEngine **engine_ptr){
-    int verbosity = static_cast<int>(nvinfer1::ILogger::Severity::kWARNING);
-    std::stringstream engine_stream;
-    engine_stream.seekg(0, engine_stream.beg);
-
-    std::ifstream file(engine_file);
-    engine_stream << file.rdbuf();
-    file.close();
-
-    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(g_logger);
-    if (runtime == nullptr) {
-        std::string msg("Failed to build runtime parser!");
-        g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-        return EXIT_FAILURE;
-    }
-    engine_stream.seekg(0, std::ios::end);
-    const int engine_size = engine_stream.tellg();
-
-    engine_stream.seekg(0, std::ios::beg); 
-    void* engine_str = malloc(engine_size);
-    engine_stream.read((char*)engine_str, engine_size);
-    
-    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
-    if (engine == nullptr) {
-        std::string msg("Failed to build engine parser!");
-        g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-        return EXIT_FAILURE;
-    }
-    *engine_ptr = engine;
-    for (int bi = 0; bi < engine->getNbBindings(); bi++) {
-        if (engine->bindingIsInput(bi) == true){
-            printf("Binding %d (%s): Input. \n", bi, engine->getBindingName(bi));
-        }
-        else{
-            printf("Binding %d (%s): Output. \n", bi, engine->getBindingName(bi));
-        }
-    }
-    return EXIT_SUCCESS;
+int BEVDet::DeserializeTRTEngine(
+  const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr)
+{
+  int verbosity = static_cast<int>(nvinfer1::ILogger::Severity::kWARNING);
+  std::stringstream engine_stream;
+  engine_stream.seekg(0, engine_stream.beg);
+
+  std::ifstream file(engine_file);
+  engine_stream << file.rdbuf();
+  file.close();
+
+  nvinfer1::IRuntime * runtime = nvinfer1::createInferRuntime(g_logger);
+  if (runtime == nullptr) {
+    std::string msg("Failed to build runtime parser!");
+    g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    return EXIT_FAILURE;
+  }
+  engine_stream.seekg(0, std::ios::end);
+  const int engine_size = engine_stream.tellg();
+
+  engine_stream.seekg(0, std::ios::beg);
+  void * engine_str = malloc(engine_size);
+  engine_stream.read((char *)engine_str, engine_size);
+
+  nvinfer1::ICudaEngine * engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
+  if (engine == nullptr) {
+    std::string msg("Failed to build engine parser!");
+    g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    return EXIT_FAILURE;
+  }
+  *engine_ptr = engine;
+  for (int bi = 0; bi < engine->getNbBindings(); bi++) {
+    if (engine->bindingIsInput(bi) == true) {
+      printf("Binding %d (%s): Input. \n", bi, engine->getBindingName(bi));
+    } else {
+      printf("Binding %d (%s): Output. \n", bi, engine->getBindingName(bi));
+    }
+  }
+  return EXIT_SUCCESS;
 }
 
-
-void BEVDet::GetAdjBEVFeature(const std::string &curr_scene_token, 
-                         const Eigen::Quaternion<float> &ego2global_rot,
-                         const Eigen::Translation3f &ego2global_trans) {
-
-    int flag = 1;
-    if(adj_frame_ptr->lastScenesToken() != curr_scene_token){
-        adj_frame_ptr->reset();
-        flag = 0;
-    }
-
-    // idx越小, adj_bevfeat越新 
-    for(int i = 0; i < adj_num; i++){
-        const void* adj_buffer = adj_frame_ptr->getFrameBuffer(i);
-
-        size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
-
-        CHECK_CUDA(cudaMemcpy((char*)trt_buffer_dev[buffer_map["adj_feats"]] + i * buf_size,
-                                adj_buffer, buf_size, cudaMemcpyDeviceToDevice));
-
-        Eigen::Quaternion<float> adj_ego2global_rot;
-        Eigen::Translation3f adj_ego2global_trans;
-        adj_frame_ptr->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
-        
-        GetCurr2AdjTransform(ego2global_rot, 
-                            adj_ego2global_rot, 
-                            ego2global_trans,
-                            adj_ego2global_trans, 
-                            (float *)trt_buffer_dev[buffer_map["transforms"]] + i * transform_size);
-    }
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["flag"]], &flag, 
-                        trt_buffer_sizes[buffer_map["flag"]], cudaMemcpyHostToDevice));
-
+void BEVDet::GetAdjBEVFeature(
+  const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
+  const Eigen::Translation3f & ego2global_trans)
+{
+  int flag = 1;
+  if (adj_frame_ptr->lastScenesToken() != curr_scene_token) {
+    adj_frame_ptr->reset();
+    flag = 0;
+  }
+
+  // idx越小, adj_bevfeat越新
+  for (int i = 0; i < adj_num; i++) {
+    const void * adj_buffer = adj_frame_ptr->getFrameBuffer(i);
+
+    size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
+
+    CHECK_CUDA(cudaMemcpy(
+      (char *)trt_buffer_dev[buffer_map["adj_feats"]] + i * buf_size, adj_buffer, buf_size,
+      cudaMemcpyDeviceToDevice));
+
+    Eigen::Quaternion<float> adj_ego2global_rot;
+    Eigen::Translation3f adj_ego2global_trans;
+    adj_frame_ptr->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
+
+    GetCurr2AdjTransform(
+      ego2global_rot, adj_ego2global_rot, ego2global_trans, adj_ego2global_trans,
+      (float *)trt_buffer_dev[buffer_map["transforms"]] + i * transform_size);
+  }
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["flag"]], &flag, trt_buffer_sizes[buffer_map["flag"]],
+    cudaMemcpyHostToDevice));
 }
 
-
-void BEVDet::GetCurr2AdjTransform(const Eigen::Quaternion<float> &curr_ego2global_rot,
-                                const Eigen::Quaternion<float> &adj_ego2global_rot,
-                                const Eigen::Translation3f &curr_ego2global_trans,
-                                const Eigen::Translation3f &adj_ego2global_trans,
-                                float* transform_dev){
-    Eigen::Matrix4f curr_e2g_transform;
-    Eigen::Matrix4f adj_e2g_transform;
-
-    for(int i = 0; i < 3; i++){
-        for(int j = 0; j < 3; j++){
-            curr_e2g_transform(i, j) = curr_ego2global_rot.matrix()(i, j);
-            adj_e2g_transform(i, j) = adj_ego2global_rot.matrix()(i, j);
-        }
-    }
-    for(int i = 0; i < 3; i++){
-        curr_e2g_transform(i, 3) = curr_ego2global_trans.vector()(i);
-        adj_e2g_transform(i, 3) = adj_ego2global_trans.vector()(i);
-
-        curr_e2g_transform(3, i) = 0.f;
-        adj_e2g_transform(3, i) = 0.f;
-    }
-    curr_e2g_transform(3, 3) = 1.f;
-    adj_e2g_transform(3, 3) = 1.f;
-
-    Eigen::Matrix4f currEgo2adjEgo = adj_e2g_transform.inverse() * curr_e2g_transform;
-    Eigen::Matrix3f currEgo2adjEgo_2d;
-    for(int i = 0; i < 2; i++){
-        for(int j = 0; j < 2; j++){
-            currEgo2adjEgo_2d(i, j) = currEgo2adjEgo(i, j);
-        }
-    }
-    currEgo2adjEgo_2d(2, 0) = 0.f;
-    currEgo2adjEgo_2d(2, 1) = 0.f;
-    currEgo2adjEgo_2d(2, 2) = 1.f;
-    currEgo2adjEgo_2d(0, 2) = currEgo2adjEgo(0, 3);
-    currEgo2adjEgo_2d(1, 2) = currEgo2adjEgo(1, 3);
-
-    Eigen::Matrix3f gridbev2egobev;
-    gridbev2egobev(0, 0) = x_step;
-    gridbev2egobev(1, 1) = y_step;
-    gridbev2egobev(0, 2) = x_start;
-    gridbev2egobev(1, 2) = y_start;
-    gridbev2egobev(2, 2) = 1.f;
-
-    gridbev2egobev(0, 1) = 0.f;
-    gridbev2egobev(1, 0) = 0.f;
-    gridbev2egobev(2, 0) = 0.f;
-    gridbev2egobev(2, 1) = 0.f;
-
-    Eigen::Matrix3f currgrid2adjgrid = gridbev2egobev.inverse() * currEgo2adjEgo_2d * gridbev2egobev;
-
-    CHECK_CUDA(cudaMemcpy(transform_dev, Eigen::Matrix3f(currgrid2adjgrid.transpose()).data(),
-                                    transform_size * sizeof(float), cudaMemcpyHostToDevice));
-
+void BEVDet::GetCurr2AdjTransform(
+  const Eigen::Quaternion<float> & curr_ego2global_rot,
+  const Eigen::Quaternion<float> & adj_ego2global_rot,
+  const Eigen::Translation3f & curr_ego2global_trans,
+  const Eigen::Translation3f & adj_ego2global_trans, float * transform_dev)
+{
+  Eigen::Matrix4f curr_e2g_transform;
+  Eigen::Matrix4f adj_e2g_transform;
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 3; j++) {
+      curr_e2g_transform(i, j) = curr_ego2global_rot.matrix()(i, j);
+      adj_e2g_transform(i, j) = adj_ego2global_rot.matrix()(i, j);
+    }
+  }
+  for (int i = 0; i < 3; i++) {
+    curr_e2g_transform(i, 3) = curr_ego2global_trans.vector()(i);
+    adj_e2g_transform(i, 3) = adj_ego2global_trans.vector()(i);
+
+    curr_e2g_transform(3, i) = 0.f;
+    adj_e2g_transform(3, i) = 0.f;
+  }
+  curr_e2g_transform(3, 3) = 1.f;
+  adj_e2g_transform(3, 3) = 1.f;
+
+  Eigen::Matrix4f currEgo2adjEgo = adj_e2g_transform.inverse() * curr_e2g_transform;
+  Eigen::Matrix3f currEgo2adjEgo_2d;
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 2; j++) {
+      currEgo2adjEgo_2d(i, j) = currEgo2adjEgo(i, j);
+    }
+  }
+  currEgo2adjEgo_2d(2, 0) = 0.f;
+  currEgo2adjEgo_2d(2, 1) = 0.f;
+  currEgo2adjEgo_2d(2, 2) = 1.f;
+  currEgo2adjEgo_2d(0, 2) = currEgo2adjEgo(0, 3);
+  currEgo2adjEgo_2d(1, 2) = currEgo2adjEgo(1, 3);
+
+  Eigen::Matrix3f gridbev2egobev;
+  gridbev2egobev(0, 0) = x_step;
+  gridbev2egobev(1, 1) = y_step;
+  gridbev2egobev(0, 2) = x_start;
+  gridbev2egobev(1, 2) = y_start;
+  gridbev2egobev(2, 2) = 1.f;
+
+  gridbev2egobev(0, 1) = 0.f;
+  gridbev2egobev(1, 0) = 0.f;
+  gridbev2egobev(2, 0) = 0.f;
+  gridbev2egobev(2, 1) = 0.f;
+
+  Eigen::Matrix3f currgrid2adjgrid = gridbev2egobev.inverse() * currEgo2adjEgo_2d * gridbev2egobev;
+
+  CHECK_CUDA(cudaMemcpy(
+    transform_dev, Eigen::Matrix3f(currgrid2adjgrid.transpose()).data(),
+    transform_size * sizeof(float), cudaMemcpyHostToDevice));
 }
 
+int BEVDet::DoInfer(
+  const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
+{
+  printf("-------------------%d-------------------\n", idx + 1);
 
+  printf(
+    "scenes_token : %s, timestamp : %lld\n", cam_data.param.scene_token.data(),
+    cam_data.param.timestamp);
 
-int BEVDet::DoInfer(const camsData& cam_data, std::vector<Box> &out_detections, float &cost_time,
-                                                                                    int idx){
-    
-    printf("-------------------%d-------------------\n", idx + 1);
+  auto start = high_resolution_clock::now();
+  CHECK_CUDA(cudaMemcpy(
+    trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, trt_buffer_sizes[buffer_map["images"]],
+    cudaMemcpyDeviceToDevice));
 
-    printf("scenes_token : %s, timestamp : %lld\n", cam_data.param.scene_token.data(), 
-                                cam_data.param.timestamp);
+  InitCamParams(
+    cam_data.param.cams2ego_rot, cam_data.param.cams2ego_trans, cam_data.param.cams_intrin);
 
-    auto start = high_resolution_clock::now();
-    CHECK_CUDA(cudaMemcpy(trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, 
-                trt_buffer_sizes[buffer_map["images"]], cudaMemcpyDeviceToDevice));
-
-    InitCamParams(cam_data.param.cams2ego_rot, 
-                cam_data.param.cams2ego_trans, 
-                cam_data.param.cams_intrin);
-
-    GetAdjBEVFeature(cam_data.param.scene_token, 
-                    cam_data.param.ego2global_rot, 
-                    cam_data.param.ego2global_trans);
-
-    if(!trt_context->executeV2(trt_buffer_dev)){ 
-        printf("BEVDet forward failing!\n");
-    }
+  GetAdjBEVFeature(
+    cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
-    adj_frame_ptr->saveFrameBuffer(trt_buffer_dev[buffer_map["curr_bevfeat"]],
-                                    cam_data.param.scene_token, 
-                                    cam_data.param.ego2global_rot, 
-                                    cam_data.param.ego2global_trans);
+  if (!trt_context->executeV2(trt_buffer_dev)) {
+    printf("BEVDet forward failing!\n");
+  }
 
+  adj_frame_ptr->saveFrameBuffer(
+    trt_buffer_dev[buffer_map["curr_bevfeat"]], cam_data.param.scene_token,
+    cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
-    auto end = high_resolution_clock::now();
+  auto end = high_resolution_clock::now();
 
-    postprocess_ptr->DoPostprocess(post_buffer, out_detections);
-    CHECK_CUDA(cudaDeviceSynchronize());
+  postprocess_ptr->DoPostprocess(post_buffer, out_detections);
+  CHECK_CUDA(cudaDeviceSynchronize());
 
-    auto post_end = high_resolution_clock::now();
+  auto post_end = high_resolution_clock::now();
 
-    duration<double> infer_t = post_end - start;
-    duration<double> engine_t = end - start;
-    duration<double> post_t = post_end - end;
+  duration<double> infer_t = post_end - start;
+  duration<double> engine_t = end - start;
+  duration<double> post_t = post_end - end;
 
-    cost_time = infer_t.count() * 1000;
-    printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
-    printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
-    printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
+  cost_time = infer_t.count() * 1000;
+  printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
+  printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
+  printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
 
-    printf("Detect %ld objects\n", out_detections.size());
+  printf("Detect %ld objects\n", out_detections.size());
 
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
 
-void BEVDet::ExportEngine(const std::string &onnxFile, const std::string &trtFile){
-    CHECK_CUDA(cudaSetDevice(0));
-    nvinfer1::ICudaEngine *engine = nullptr;
-    std::vector<nvinfer1::Dims32> min_shapes{
-    {4, {6, 3, 900, 400}},
-    {1, {3}},
-    {1, {3}},
-    {3, {1, 6, 27}},
-    {1, {200000}},
-    {1, {200000}},
-    {1, {200000}},
-    {1, {8000}},
-    {1, {8000}},
-    {5, {1, 8, 80, 128, 128}},
-    {3, {1, 8, 6}},
-    {2, {1, 1}}
-    };
-
-    std::vector<nvinfer1::Dims32> opt_shapes{
-        {4, {6, 3, 900, 400}},
-        {1, {3}},
-        {1, {3}},
-        {3, {1, 6, 27}},
-        {1, {356760}},
-        {1, {356760}},
-        {1, {356760}},
-        {1, {13360}},
-        {1, {13360}},
-        {5, {1, 8, 80, 128, 128}},
-        {3, {1, 8, 6}},
-        {2, {1, 1}}
-    };
-
-
-    std::vector<nvinfer1::Dims32> max_shapes{
-        {4, {6, 3, 900, 400}},
-        {1, {3}},
-        {1, {3}},
-        {3, {1, 6, 27}},
-        {1, {370000}},
-        {1, {370000}},
-        {1, {370000}},
-        {1, {14000}},
-        {1, {14000}},
-        {5, {1, 8, 80, 128, 128}},
-        {3, {1, 8, 6}},
-        {2, {1, 1}}
-    };
-    nvinfer1::IBuilder *            builder = nvinfer1::createInferBuilder(g_logger);
-    nvinfer1::INetworkDefinition *  network = builder->createNetworkV2(1U << int(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
-    nvinfer1::IOptimizationProfile *profile = builder->createOptimizationProfile();
-    nvinfer1::IBuilderConfig *      config  = builder->createBuilderConfig();
-    config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 3UL << 32UL);
-        
-    config->setFlag(nvinfer1::BuilderFlag::kFP16);
-
-    nvonnxparser::IParser *parser = nvonnxparser::createParser(*network, g_logger);
-
-
-    if (!parser->parseFromFile(onnxFile.c_str(), int(g_logger.reportable_severity)))
-    {
-        std::cout << std::string("Failed parsing .onnx file!") << std::endl;
-        for (int i = 0; i < parser->getNbErrors(); ++i)
-        {
-            auto *error = parser->getError(i);
-            std::cout << std::to_string(int(error->code())) << std::string(":") << std::string(error->desc()) << std::endl;
-        }
-
-        return;
+void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtFile)
+{
+  CHECK_CUDA(cudaSetDevice(0));
+  nvinfer1::ICudaEngine * engine = nullptr;
+  std::vector<nvinfer1::Dims32> min_shapes{
+    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},    {3, {1, 6, 27}}, {1, {200000}},
+    {1, {200000}},         {1, {200000}}, {1, {8000}}, {1, {8000}},     {5, {1, 8, 80, 128, 128}},
+    {3, {1, 8, 6}},        {2, {1, 1}}};
+
+  std::vector<nvinfer1::Dims32> opt_shapes{
+    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},     {3, {1, 6, 27}}, {1, {356760}},
+    {1, {356760}},         {1, {356760}}, {1, {13360}}, {1, {13360}},    {5, {1, 8, 80, 128, 128}},
+    {3, {1, 8, 6}},        {2, {1, 1}}};
+
+  std::vector<nvinfer1::Dims32> max_shapes{
+    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},     {3, {1, 6, 27}}, {1, {370000}},
+    {1, {370000}},         {1, {370000}}, {1, {14000}}, {1, {14000}},    {5, {1, 8, 80, 128, 128}},
+    {3, {1, 8, 6}},        {2, {1, 1}}};
+  nvinfer1::IBuilder * builder = nvinfer1::createInferBuilder(g_logger);
+  nvinfer1::INetworkDefinition * network =
+    builder->createNetworkV2(1U << int(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
+  nvinfer1::IOptimizationProfile * profile = builder->createOptimizationProfile();
+  nvinfer1::IBuilderConfig * config = builder->createBuilderConfig();
+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 3UL << 32UL);
+
+  config->setFlag(nvinfer1::BuilderFlag::kFP16);
+
+  nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger);
+
+  if (!parser->parseFromFile(onnxFile.c_str(), int(g_logger.reportable_severity))) {
+    std::cout << std::string("Failed parsing .onnx file!") << std::endl;
+    for (int i = 0; i < parser->getNbErrors(); ++i) {
+      auto * error = parser->getError(i);
+      std::cout << std::to_string(int(error->code())) << std::string(":")
+                << std::string(error->desc()) << std::endl;
     }
-    std::cout << std::string("Succeeded parsing .onnx file!") << std::endl;
 
-    for(size_t i = 0; i < min_shapes.size(); i++){
-        nvinfer1::ITensor *it = network->getInput(i);
-        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMIN, min_shapes[i]);
-        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shapes[i]);
-        profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMAX, max_shapes[i]);
-    }
-    config->addOptimizationProfile(profile);
+    return;
+  }
+  std::cout << std::string("Succeeded parsing .onnx file!") << std::endl;
+
+  for (size_t i = 0; i < min_shapes.size(); i++) {
+    nvinfer1::ITensor * it = network->getInput(i);
+    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMIN, min_shapes[i]);
+    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shapes[i]);
+    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMAX, max_shapes[i]);
+  }
+  config->addOptimizationProfile(profile);
+
+  nvinfer1::IHostMemory * engineString = builder->buildSerializedNetwork(*network, *config);
+  if (engineString == nullptr || engineString->size() == 0) {
+    std::cout << "Failed building serialized engine!" << std::endl;
+    return;
+  }
+  std::cout << "Succeeded building serialized engine!" << std::endl;
 
-    nvinfer1::IHostMemory *engineString = builder->buildSerializedNetwork(*network, *config);
-    if (engineString == nullptr || engineString->size() == 0)
-    {
-        std::cout << "Failed building serialized engine!" << std::endl;
-        return;
-    }
-    std::cout << "Succeeded building serialized engine!" << std::endl;
-
-    nvinfer1::IRuntime *runtime {nvinfer1::createInferRuntime(g_logger)};
-    engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
-    if (engine == nullptr)
-    {
-        std::cout << "Failed building engine!" << std::endl;
-        return;
-    }
-    std::cout << "Succeeded building engine!" << std::endl;
+  nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger)};
+  engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
+  if (engine == nullptr) {
+    std::cout << "Failed building engine!" << std::endl;
+    return;
+  }
+  std::cout << "Succeeded building engine!" << std::endl;
 
-    std::ofstream engineFile(trtFile, std::ios::binary);
-    if (!engineFile)
-    {
-        std::cout << "Failed opening file to write" << std::endl;
-        return;
-    }
-    engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
-    if (engineFile.fail())
-    {
-        std::cout << "Failed saving .engine file!" << std::endl;
-        return;
-    }
-    std::cout << "Succeeded saving .engine file!" << std::endl;
+  std::ofstream engineFile(trtFile, std::ios::binary);
+  if (!engineFile) {
+    std::cout << "Failed opening file to write" << std::endl;
+    return;
+  }
+  engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
+  if (engineFile.fail()) {
+    std::cout << "Failed saving .engine file!" << std::endl;
+    return;
+  }
+  std::cout << "Succeeded saving .engine file!" << std::endl;
 }
 
-BEVDet::~BEVDet(){
-
-    for(int i = 0; i < trt_engine->getNbBindings(); i++){
-        CHECK_CUDA(cudaFree(trt_buffer_dev[i]));
-    }
-    delete[] trt_buffer_dev;
-    delete[] post_buffer;
+BEVDet::~BEVDet()
+{
+  for (int i = 0; i < trt_engine->getNbBindings(); i++) {
+    CHECK_CUDA(cudaFree(trt_buffer_dev[i]));
+  }
+  delete[] trt_buffer_dev;
+  delete[] post_buffer;
 
-    delete[] cam_params_host;
+  delete[] cam_params_host;
 
-    trt_context->destroy();
-    trt_engine->destroy();
-}
\ No newline at end of file
+  trt_context->destroy();
+  trt_engine->destroy();
+}
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/tensorrt_bevdet/src/bevdet_node.cpp
index f7c34d65f123f..8d2acc87c848f 100644
--- a/perception/tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet_node.cpp
@@ -1,19 +1,10 @@
 #include "bevdet_node.hpp"
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
-std::map< int, std::vector<int>> colormap { 
-            {0, {0, 0, 255}},  // dodger blue 
-            {1, {0, 201, 87}},   
-            {2, {0, 201, 87}},
-            {3, {160, 32, 240}},
-            {4, {3, 168, 158}},
-            {5, {255, 0, 0}},
-            {6, {255, 97, 0}},
-            {7, {30,  0, 255}},
-            {8, {255, 0, 0}},
-            {9, {0, 0, 255}},
-            {10, {0, 0, 0}}
-};
+std::map<int, std::vector<int>> colormap{
+  {0, {0, 0, 255}},  // dodger blue
+  {1, {0, 201, 87}}, {2, {0, 201, 87}}, {3, {160, 32, 240}}, {4, {3, 168, 158}}, {5, {255, 0, 0}},
+  {6, {255, 97, 0}}, {7, {30, 0, 255}}, {8, {255, 0, 0}},    {9, {0, 0, 255}},   {10, {0, 0, 0}}};
 
 uint8_t getSemanticType(const std::string & class_name)
 {
@@ -36,224 +27,226 @@ uint8_t getSemanticType(const std::string & class_name)
   }
 }
 
-void Getinfo(void) 
+void Getinfo(void)
 {
-    cudaDeviceProp prop;
-
-    int count = 0;
-    cudaGetDeviceCount(&count);
-    printf("\nGPU has cuda devices: %d\n", count);
-    for (int i = 0; i < count; ++i) {
-        cudaGetDeviceProperties(&prop, i);
-        printf("----device id: %d info----\n", i);
-        printf("  GPU : %s \n", prop.name);
-        printf("  Capbility: %d.%d\n", prop.major, prop.minor);
-        printf("  Global memory: %luMB\n", prop.totalGlobalMem >> 20);
-        printf("  Const memory: %luKB\n", prop.totalConstMem >> 10);
-        printf("  Shared memory in a block: %luKB\n", prop.sharedMemPerBlock >> 10);
-        printf("  warp size: %d\n", prop.warpSize);
-        printf("  threads in a block: %d\n", prop.maxThreadsPerBlock);
-        printf("  block dim: (%d,%d,%d)\n", prop.maxThreadsDim[0],
-                prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
-        printf("  grid dim: (%d,%d,%d)\n", prop.maxGridSize[0], prop.maxGridSize[1],
-                prop.maxGridSize[2]);
-    }
-    printf("\n");
+  cudaDeviceProp prop;
+
+  int count = 0;
+  cudaGetDeviceCount(&count);
+  printf("\nGPU has cuda devices: %d\n", count);
+  for (int i = 0; i < count; ++i) {
+    cudaGetDeviceProperties(&prop, i);
+    printf("----device id: %d info----\n", i);
+    printf("  GPU : %s \n", prop.name);
+    printf("  Capbility: %d.%d\n", prop.major, prop.minor);
+    printf("  Global memory: %luMB\n", prop.totalGlobalMem >> 20);
+    printf("  Const memory: %luKB\n", prop.totalConstMem >> 10);
+    printf("  Shared memory in a block: %luKB\n", prop.sharedMemPerBlock >> 10);
+    printf("  warp size: %d\n", prop.warpSize);
+    printf("  threads in a block: %d\n", prop.maxThreadsPerBlock);
+    printf(
+      "  block dim: (%d,%d,%d)\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1],
+      prop.maxThreadsDim[2]);
+    printf(
+      "  grid dim: (%d,%d,%d)\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
+  }
+  printf("\n");
 }
 
-void box3DToDetectedObjects(const std::vector<Box>& boxes, 
-                    autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
-                    const std::vector<std::string> & class_names,
-                    float score_thre, const bool has_twist=true)
+void box3DToDetectedObjects(
+  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
+  const std::vector<std::string> & class_names, float score_thre, const bool has_twist = true)
 {
-    for(auto b : boxes)
-    {   
-        if(b.score < score_thre)
-            continue;
-        autoware_perception_msgs::msg::DetectedObject obj;
-
-        Eigen::Vector3f center(b.x, b.y, b.z + b.h/2.);
-
-        obj.existence_probability = b.score;
-        // classification
-        autoware_perception_msgs::msg::ObjectClassification classification;
-        classification.probability = 1.0f;
-        if (b.label >= 0 && static_cast<size_t>(b.label) < class_names.size()) {
-            classification.label = getSemanticType(class_names[b.label]);
-        } else {
-            classification.label = Label::UNKNOWN;
-        }
-        obj.classification.emplace_back(classification);
-
-        // pose and shape
-        geometry_msgs::msg::Point p;
-        p.x = center.x();
-        p.y = center.y();
-        p.z = center.z();
-        obj.kinematics.pose_with_covariance.pose.position = p;
-
-        tf2::Quaternion q;
-        q.setRPY(0, 0, b.r);
-        obj.kinematics.pose_with_covariance.pose.orientation = tf2::toMsg(q);
-
-        obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
-
-        geometry_msgs::msg::Vector3 v;
-        v.x = b.l;
-        v.y = b.w;
-        v.z = b.h;
-        obj.shape.dimensions = v;
-        if (has_twist) {
-          float vel_x = b.vx;
-          float vel_y = b.vy;
-          geometry_msgs::msg::Twist twist;
-          twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
-          twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - b.r);
-          obj.kinematics.twist_with_covariance.twist = twist;
-          obj.kinematics.has_twist = has_twist;
-        }
-
-        bevdet_objects.objects.emplace_back(obj);
-
+  for (auto b : boxes) {
+    if (b.score < score_thre) continue;
+    autoware_perception_msgs::msg::DetectedObject obj;
+
+    Eigen::Vector3f center(b.x, b.y, b.z + b.h / 2.);
+
+    obj.existence_probability = b.score;
+    // classification
+    autoware_perception_msgs::msg::ObjectClassification classification;
+    classification.probability = 1.0f;
+    if (b.label >= 0 && static_cast<size_t>(b.label) < class_names.size()) {
+      classification.label = getSemanticType(class_names[b.label]);
+    } else {
+      classification.label = Label::UNKNOWN;
     }
+    obj.classification.emplace_back(classification);
+
+    // pose and shape
+    geometry_msgs::msg::Point p;
+    p.x = center.x();
+    p.y = center.y();
+    p.z = center.z();
+    obj.kinematics.pose_with_covariance.pose.position = p;
+
+    tf2::Quaternion q;
+    q.setRPY(0, 0, b.r);
+    obj.kinematics.pose_with_covariance.pose.orientation = tf2::toMsg(q);
+
+    obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+
+    geometry_msgs::msg::Vector3 v;
+    v.x = b.l;
+    v.y = b.w;
+    v.z = b.h;
+    obj.shape.dimensions = v;
+    if (has_twist) {
+      float vel_x = b.vx;
+      float vel_y = b.vy;
+      geometry_msgs::msg::Twist twist;
+      twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
+      twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - b.r);
+      obj.kinematics.twist_with_covariance.twist = twist;
+      obj.kinematics.has_twist = has_twist;
+    }
+
+    bevdet_objects.objects.emplace_back(obj);
+  }
 }
 
-TRTBEVDetNode::TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & node_options)
+TRTBEVDetNode::TRTBEVDetNode(
+  const std::string & node_name, const rclcpp::NodeOptions & node_options)
 : rclcpp::Node(node_name, node_options)
 {
-    using std::placeholders::_1;
-    using std::placeholders::_2;
-    using std::placeholders::_3;
-    using std::placeholders::_4;
-    using std::placeholders::_5;
-    using std::placeholders::_6;
-    using std::placeholders::_7;
-
-    score_thre_ = this->declare_parameter<float>("post_process_params.score_thre", 0.2);
-
-    img_N_ = this->declare_parameter<int>("data_params.N", 6); // camera num 6
-    img_w_ = this->declare_parameter<int>("data_params.W", 1600); // W: 1600
-    img_h_ = this->declare_parameter<int>("data_params.H", 900); // H: 900
-     
-    model_config_ = this->declare_parameter("model_config", "bevdet_r50_4dlongterm_depth.yaml");
-    
-    onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
-    engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
-    
-    camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml")); 
-    
-    imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
-    class_names_ = this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
-  
-    RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
-    
-    sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
-
-    RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
-
-    bevdet_ = std::make_shared<BEVDet>(model_config_, img_N_, sampleData_.param.cams_intrin, 
-                sampleData_.param.cams2ego_rot, sampleData_.param.cams2ego_trans, onnx_file_, 
-                                                    engine_file_);
-    
-    RCLCPP_INFO_STREAM(this->get_logger(), "Successful create bevdet!");
-    
-    CHECK_CUDA(cudaMalloc((void**)&imgs_dev_, img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
-
-    pub_cloud_ = this ->create_publisher<sensor_msgs::msg::PointCloud2>("~/output/painting_cloud", rclcpp::SensorDataQoS()); 
-    pub_boxes_ = this ->create_publisher<autoware_perception_msgs::msg::DetectedObjects>("~/output/boxes", rclcpp::QoS{1});   
-    
-    sub_cloud_.subscribe(this, "~/input/topic_cloud", rclcpp::QoS{1}.get_rmw_qos_profile());//rclcpp::QoS{1}.get_rmw_qos_profile()
-    sub_f_img_.subscribe(this, "~/input/topic_img_f", rclcpp::QoS{1}.get_rmw_qos_profile());//rmw_qos_profile_sensor_data
-    sub_b_img_.subscribe(this, "~/input/topic_img_b", rclcpp::QoS{1}.get_rmw_qos_profile());
-
-    sub_fl_img_.subscribe(this, "~/input/topic_img_fl", rclcpp::QoS{1}.get_rmw_qos_profile());
-    sub_fr_img_.subscribe(this, "~/input/topic_img_fr", rclcpp::QoS{1}.get_rmw_qos_profile());
-    
-    sub_bl_img_.subscribe(this, "~/input/topic_img_bl", rclcpp::QoS{1}.get_rmw_qos_profile());
-    sub_br_img_.subscribe(this, "~/input/topic_img_br", rclcpp::QoS{1}.get_rmw_qos_profile());
-  
-    sync_ = std::make_shared<Sync>( MySyncPolicy(10), sub_cloud_, 
-    sub_fl_img_, sub_f_img_, sub_fr_img_,
-    sub_bl_img_ ,sub_b_img_, sub_br_img_); 
-  
-    sync_->registerCallback(std::bind(&TRTBEVDetNode::callback,this, _1, _2, _3, _4, _5, _6,_7)); // 绑定回调函数
-  
-}
+  using std::placeholders::_1;
+  using std::placeholders::_2;
+  using std::placeholders::_3;
+  using std::placeholders::_4;
+  using std::placeholders::_5;
+  using std::placeholders::_6;
+  using std::placeholders::_7;
+
+  score_thre_ = this->declare_parameter<float>("post_process_params.score_thre", 0.2);
+
+  img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
+  img_w_ = this->declare_parameter<int>("data_params.W", 1600);  // W: 1600
+  img_h_ = this->declare_parameter<int>("data_params.H", 900);   // H: 900
+
+  model_config_ = this->declare_parameter("model_config", "bevdet_r50_4dlongterm_depth.yaml");
+
+  onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
+  engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
+
+  camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml"));
+
+  imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
+  class_names_ =
+    this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
+
+  RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
+
+  sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
+
+  RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
+
+  bevdet_ = std::make_shared<BEVDet>(
+    model_config_, img_N_, sampleData_.param.cams_intrin, sampleData_.param.cams2ego_rot,
+    sampleData_.param.cams2ego_trans, onnx_file_, engine_file_);
+
+  RCLCPP_INFO_STREAM(this->get_logger(), "Successful create bevdet!");
+
+  CHECK_CUDA(cudaMalloc((void **)&imgs_dev_, img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
+
+  pub_cloud_ = this->create_publisher<sensor_msgs::msg::PointCloud2>(
+    "~/output/painting_cloud", rclcpp::SensorDataQoS());
+  pub_boxes_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjects>(
+    "~/output/boxes", rclcpp::QoS{1});
+
+  sub_cloud_.subscribe(
+    this, "~/input/topic_cloud",
+    rclcpp::QoS{1}.get_rmw_qos_profile());  // rclcpp::QoS{1}.get_rmw_qos_profile()
+  sub_f_img_.subscribe(
+    this, "~/input/topic_img_f",
+    rclcpp::QoS{1}.get_rmw_qos_profile());  // rmw_qos_profile_sensor_data
+  sub_b_img_.subscribe(this, "~/input/topic_img_b", rclcpp::QoS{1}.get_rmw_qos_profile());
 
+  sub_fl_img_.subscribe(this, "~/input/topic_img_fl", rclcpp::QoS{1}.get_rmw_qos_profile());
+  sub_fr_img_.subscribe(this, "~/input/topic_img_fr", rclcpp::QoS{1}.get_rmw_qos_profile());
 
-void TRTBEVDetNode::callback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud, 
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
-    const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img)
-{   
-    
-    pcl::PointCloud<PointT>::Ptr cloud(new pcl::PointCloud<PointT>);
-    
-    pcl::fromROSMsg(*msg_cloud, *cloud);
-   
-    cv::Mat img_fl, img_f, img_fr, img_bl, img_b, img_br;
-    std::vector<cv::Mat> imgs;
-    img_fl = cv_bridge::toCvShare(msg_fl_img , "bgr8")->image;
-    img_f  = cv_bridge::toCvShare(msg_f_img, "bgr8")->image;
-    img_fr = cv_bridge::toCvShare(msg_fr_img, "bgr8")->image;
-    img_bl = cv_bridge::toCvShare(msg_bl_img , "bgr8")->image;
-    img_b  = cv_bridge::toCvShare(msg_b_img, "bgr8")->image;
-    img_br = cv_bridge::toCvShare(msg_br_img, "bgr8")->image;
-
-    imgs.emplace_back(img_fl);
-    imgs.emplace_back(img_f);
-    imgs.emplace_back(img_fr);
-    imgs.emplace_back(img_bl);
-    imgs.emplace_back(img_b);
-    imgs.emplace_back(img_br);
-
-    std::vector<std::vector<char>> imgs_data;
-    // Mat -> Vetor<char>  
-    cvImgToArr(imgs, imgs_data);
-    
-    // cpu imgs_data -> gpu imgs_dev
-    decode_cpu(imgs_data, imgs_dev_, img_w_, img_h_);
-
-    // uchar *imgs_dev
-    sampleData_.imgs_dev = imgs_dev_;
-
-    std::vector<Box> ego_boxes;
-    ego_boxes.clear();
-    float time = 0.f;
-    
-    bevdet_->DoInfer(sampleData_, ego_boxes, time);
-    
-    autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
-    bevdet_objects.header.frame_id = "base_link";
-    bevdet_objects.header.stamp = msg_cloud->header.stamp;
-    
-    box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_);
-    
-    pub_boxes_ -> publish(bevdet_objects);
-    
-    sensor_msgs::msg::PointCloud2 msg_cloud_painting;
-    pcl::toROSMsg(*cloud, msg_cloud_painting);
-
-    msg_cloud_painting.header.frame_id = msg_cloud->header.frame_id;
-    msg_cloud_painting.header.stamp = msg_cloud->header.stamp;
-    pub_cloud_ -> publish(msg_cloud_painting);
+  sub_bl_img_.subscribe(this, "~/input/topic_img_bl", rclcpp::QoS{1}.get_rmw_qos_profile());
+  sub_br_img_.subscribe(this, "~/input/topic_img_br", rclcpp::QoS{1}.get_rmw_qos_profile());
+
+  sync_ = std::make_shared<Sync>(
+    MySyncPolicy(10), sub_cloud_, sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_,
+    sub_br_img_);
+
+  sync_->registerCallback(
+    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6, _7));  // 绑定回调函数
 }
 
+void TRTBEVDetNode::callback(
+  const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
+  const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img)
+{
+  pcl::PointCloud<PointT>::Ptr cloud(new pcl::PointCloud<PointT>);
+
+  pcl::fromROSMsg(*msg_cloud, *cloud);
+
+  cv::Mat img_fl, img_f, img_fr, img_bl, img_b, img_br;
+  std::vector<cv::Mat> imgs;
+  img_fl = cv_bridge::toCvShare(msg_fl_img, "bgr8")->image;
+  img_f = cv_bridge::toCvShare(msg_f_img, "bgr8")->image;
+  img_fr = cv_bridge::toCvShare(msg_fr_img, "bgr8")->image;
+  img_bl = cv_bridge::toCvShare(msg_bl_img, "bgr8")->image;
+  img_b = cv_bridge::toCvShare(msg_b_img, "bgr8")->image;
+  img_br = cv_bridge::toCvShare(msg_br_img, "bgr8")->image;
+
+  imgs.emplace_back(img_fl);
+  imgs.emplace_back(img_f);
+  imgs.emplace_back(img_fr);
+  imgs.emplace_back(img_bl);
+  imgs.emplace_back(img_b);
+  imgs.emplace_back(img_br);
+
+  std::vector<std::vector<char>> imgs_data;
+  // Mat -> Vetor<char>
+  cvImgToArr(imgs, imgs_data);
+
+  // cpu imgs_data -> gpu imgs_dev
+  decode_cpu(imgs_data, imgs_dev_, img_w_, img_h_);
+
+  // uchar *imgs_dev
+  sampleData_.imgs_dev = imgs_dev_;
+
+  std::vector<Box> ego_boxes;
+  ego_boxes.clear();
+  float time = 0.f;
+
+  bevdet_->DoInfer(sampleData_, ego_boxes, time);
+
+  autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
+  bevdet_objects.header.frame_id = "base_link";
+  bevdet_objects.header.stamp = msg_cloud->header.stamp;
+
+  box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_);
+
+  pub_boxes_->publish(bevdet_objects);
+
+  sensor_msgs::msg::PointCloud2 msg_cloud_painting;
+  pcl::toROSMsg(*cloud, msg_cloud_painting);
+
+  msg_cloud_painting.header.frame_id = msg_cloud->header.frame_id;
+  msg_cloud_painting.header.stamp = msg_cloud->header.stamp;
+  pub_cloud_->publish(msg_cloud_painting);
+}
 
 TRTBEVDetNode::~TRTBEVDetNode()
 {
-    delete imgs_dev_;
+  delete imgs_dev_;
 }
 
-
-int main(int argc, char **argv)
-{   
-    rclcpp::init(argc, argv);
-    rclcpp::NodeOptions node_options;
-    auto bevdet_node = std::make_shared<TRTBEVDetNode>("tensorrt_bevdet_node", node_options);
-    rclcpp::spin(bevdet_node);
-    return 0;
-}
\ No newline at end of file
+int main(int argc, char ** argv)
+{
+  rclcpp::init(argc, argv);
+  rclcpp::NodeOptions node_options;
+  auto bevdet_node = std::make_shared<TRTBEVDetNode>("tensorrt_bevdet_node", node_options);
+  rclcpp::spin(bevdet_node);
+  return 0;
+}
diff --git a/perception/tensorrt_bevdet/src/bevpool_plugin.cu b/perception/tensorrt_bevdet/src/bevpool_plugin.cu
index 8654b6c3bf688..9b98cba128c29 100644
--- a/perception/tensorrt_bevdet/src/bevpool_plugin.cu
+++ b/perception/tensorrt_bevdet/src/bevpool_plugin.cu
@@ -16,6 +16,7 @@
 
 #include "bevpool_plugin.hpp"
 #include "common.hpp"
+
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
 
@@ -27,346 +28,341 @@
 // input[5] interval_starts  k
 // input[6] interval_lengths k
 
-// out[0]   bevfeat          b x c x h x w    
-
-template<typename T1, typename T2>
-__global__ void bev_pool_v2_kernel(int channel, 
-                                    int n_intervals,
-                                    int map_size,
-                                    int img_size,
-                                    const T1 *__restrict__ depth,
-                                    const T1 *__restrict__ feat,
-                                    const int *__restrict__ ranks_depth,
-                                    const int *__restrict__ ranks_feat,
-                                    const int *__restrict__ ranks_bev,
-                                    const int *__restrict__ interval_starts,
-                                    const int *__restrict__ interval_lengths,
-                                    T2 * __restrict__ out) {
-    CUDA_1D_KERNEL_LOOP(idx, n_intervals * channel){
-        int index = idx / channel;    // bev grid index
-        int curr_c = idx % channel;    // channel index
-        int interval_start = interval_starts[index];  
-        int interval_length = interval_lengths[index];  
-
-        int curr_step = curr_c * img_size;
-        int chan_step = channel * img_size;
-
-        T2 sum = 0;
-
-        int feat_offset = 0;
-        for(int i = 0; i < interval_length; i++){
-            feat_offset = ranks_feat[interval_start + i] / img_size * chan_step + 
-                          curr_step + ranks_feat[interval_start + i] % img_size;
-  
-            sum += static_cast<T2>(feat[feat_offset]) * static_cast<T2>(depth[ranks_depth[interval_start + i]]);
-        }
-        out[curr_c * map_size + ranks_bev[interval_start]] = sum;
+// out[0]   bevfeat          b x c x h x w
+
+template <typename T1, typename T2>
+__global__ void bev_pool_v2_kernel(
+  int channel, int n_intervals, int map_size, int img_size, const T1 * __restrict__ depth,
+  const T1 * __restrict__ feat, const int * __restrict__ ranks_depth,
+  const int * __restrict__ ranks_feat, const int * __restrict__ ranks_bev,
+  const int * __restrict__ interval_starts, const int * __restrict__ interval_lengths,
+  T2 * __restrict__ out)
+{
+  CUDA_1D_KERNEL_LOOP(idx, n_intervals * channel)
+  {
+    int index = idx / channel;   // bev grid index
+    int curr_c = idx % channel;  // channel index
+    int interval_start = interval_starts[index];
+    int interval_length = interval_lengths[index];
+
+    int curr_step = curr_c * img_size;
+    int chan_step = channel * img_size;
+
+    T2 sum = 0;
+
+    int feat_offset = 0;
+    for (int i = 0; i < interval_length; i++) {
+      feat_offset = ranks_feat[interval_start + i] / img_size * chan_step + curr_step +
+                    ranks_feat[interval_start + i] % img_size;
+
+      sum += static_cast<T2>(feat[feat_offset]) *
+             static_cast<T2>(depth[ranks_depth[interval_start + i]]);
     }
+    out[curr_c * map_size + ranks_bev[interval_start]] = sum;
+  }
 }
 
 namespace nvinfer1
 {
 // class BEVPoolPlugin
-BEVPoolPlugin::BEVPoolPlugin(const std::string &name, int bev_h, int bev_w, int n):
-    name_(name){
-    m_.bev_h = bev_h;
-    m_.bev_w = bev_w;
-    m_.n = n;
+BEVPoolPlugin::BEVPoolPlugin(const std::string & name, int bev_h, int bev_w, int n) : name_(name)
+{
+  m_.bev_h = bev_h;
+  m_.bev_w = bev_w;
+  m_.n = n;
 }
 
-BEVPoolPlugin::BEVPoolPlugin(const std::string &name, const void *buffer, size_t length):
-    name_(name){
-    memcpy(&m_, buffer, sizeof(m_));
+BEVPoolPlugin::BEVPoolPlugin(const std::string & name, const void * buffer, size_t length)
+: name_(name)
+{
+  memcpy(&m_, buffer, sizeof(m_));
 }
 
-BEVPoolPlugin::~BEVPoolPlugin(){
+BEVPoolPlugin::~BEVPoolPlugin()
+{
 }
 
-IPluginV2DynamicExt *BEVPoolPlugin::clone() const noexcept {
-    auto p = new BEVPoolPlugin(name_, &m_, sizeof(m_));
-    p->setPluginNamespace(namespace_.c_str());
-    return p;
+IPluginV2DynamicExt * BEVPoolPlugin::clone() const noexcept
+{
+  auto p = new BEVPoolPlugin(name_, &m_, sizeof(m_));
+  p->setPluginNamespace(namespace_.c_str());
+  return p;
 }
 
-int32_t BEVPoolPlugin::getNbOutputs() const noexcept {
-    return 1;
+int32_t BEVPoolPlugin::getNbOutputs() const noexcept
+{
+  return 1;
 }
- 
-DataType BEVPoolPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
-                                                                int32_t nbInputs) const noexcept {
-    return DataType::kFLOAT;
+
+DataType BEVPoolPlugin::getOutputDataType(
+  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
+{
+  return DataType::kFLOAT;
 }
 
-DimsExprs BEVPoolPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
-                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
-    // input[0] depth            b*n x d x h x w
-    // input[1] feat             b*n x c x h x w
+DimsExprs BEVPoolPlugin::getOutputDimensions(
+  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+  IExprBuilder & exprBuilder) noexcept
+{
+  // input[0] depth            b*n x d x h x w
+  // input[1] feat             b*n x c x h x w
 
-    // input[2] ranks_depth      m
-    // input[3] ranks_feat       m
-    // input[4] ranks_bev        m
-    // input[5] interval_starts  k
-    // input[6] interval_lengths k
+  // input[2] ranks_depth      m
+  // input[3] ranks_feat       m
+  // input[4] ranks_bev        m
+  // input[5] interval_starts  k
+  // input[6] interval_lengths k
 
-    // out[0]   bevfeat          b x c x h x w    
+  // out[0]   bevfeat          b x c x h x w
 
-    DimsExprs ret;
-    ret.nbDims = inputs[1].nbDims;
-    ret.d[0] = exprBuilder.operation(DimensionOperation::kFLOOR_DIV, *inputs[1].d[0], *exprBuilder.constant(m_.n)); 
-    ret.d[1] = inputs[1].d[1];
-    ret.d[2] = exprBuilder.constant(m_.bev_h);
-    ret.d[3] = exprBuilder.constant(m_.bev_w);
-    
-    return ret; 
-}
+  DimsExprs ret;
+  ret.nbDims = inputs[1].nbDims;
+  ret.d[0] = exprBuilder.operation(
+    DimensionOperation::kFLOOR_DIV, *inputs[1].d[0], *exprBuilder.constant(m_.n));
+  ret.d[1] = inputs[1].d[1];
+  ret.d[2] = exprBuilder.constant(m_.bev_h);
+  ret.d[3] = exprBuilder.constant(m_.bev_w);
 
-bool BEVPoolPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
-                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
-    // depth
-    if(pos == 0){
-        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-                 inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 1){ // feat
-        return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 7){ // out
-        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-                inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else{
-        return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    return false;
+  return ret;
 }
 
-void BEVPoolPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
-                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
-    return;
+bool BEVPoolPlugin::supportsFormatCombination(
+  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+{
+  // depth
+  if (pos == 0) {
+    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+           inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 1) {  // feat
+    return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 7) {  // out
+    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+           inOut[pos].format == TensorFormat::kLINEAR;
+  } else {
+    return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
+  }
+  return false;
+}
+
+void BEVPoolPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+  int32_t nbOutputs) noexcept
+{
+  return;
 }
 
-size_t BEVPoolPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
-                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
-    return 0;
+size_t BEVPoolPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+  int32_t nbOutputs) const noexcept
+{
+  return 0;
 }
 
-int32_t BEVPoolPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
-    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
-    // input[0] == depth            b*n x d x h x w
-    // input[1] == feat             b*n x c x h x w
-    // input[2] == ranks_depth      m
-    // input[3] == ranks_feat       m
-    // input[4] == ranks_bev        m
-    // input[5] == interval_starts  k
-    // input[6] == interval_lengths k
-
-    int channel = inputDesc[1].dims.d[1];
-    int n_intervals = inputDesc[5].dims.d[0];
-    int map_size = m_.bev_h * m_.bev_w;
-    int img_size = inputDesc[0].dims.d[2] * inputDesc[0].dims.d[3];
-
-    dim3 grid(GET_BLOCKS(n_intervals * channel));
-    dim3 block(NUM_THREADS);
-
-    // printf("BEVPool input depth %s\n", dataTypeToString(inputDesc[0].type).c_str());
-    // printf("BEVPool input  feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
-    // printf("BEVPool output feat %s\n", dataTypeToString(outputDesc[0].type).c_str());
-
-    switch (int(outputDesc[0].type))
-    {
+int32_t BEVPoolPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  // input[0] == depth            b*n x d x h x w
+  // input[1] == feat             b*n x c x h x w
+  // input[2] == ranks_depth      m
+  // input[3] == ranks_feat       m
+  // input[4] == ranks_bev        m
+  // input[5] == interval_starts  k
+  // input[6] == interval_lengths k
+
+  int channel = inputDesc[1].dims.d[1];
+  int n_intervals = inputDesc[5].dims.d[0];
+  int map_size = m_.bev_h * m_.bev_w;
+  int img_size = inputDesc[0].dims.d[2] * inputDesc[0].dims.d[3];
+
+  dim3 grid(GET_BLOCKS(n_intervals * channel));
+  dim3 block(NUM_THREADS);
+
+  // printf("BEVPool input depth %s\n", dataTypeToString(inputDesc[0].type).c_str());
+  // printf("BEVPool input  feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
+  // printf("BEVPool output feat %s\n", dataTypeToString(outputDesc[0].type).c_str());
+
+  switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
-        if(inputDesc[0].type == DataType::kFLOAT){
-            // printf("bevpool : fp32 fp32\n");
-            bev_pool_v2_kernel<float, float><<<grid, block, 0, stream>>>(
-                                                        channel, 
-                                                        n_intervals,
-                                                        map_size,
-                                                        img_size,
-                                                        reinterpret_cast<const float *>(inputs[0]),
-                                                        reinterpret_cast<const float *>(inputs[1]),
-                                                        reinterpret_cast<const int *>(inputs[2]),
-                                                        reinterpret_cast<const int *>(inputs[3]),
-                                                        reinterpret_cast<const int *>(inputs[4]),
-                                                        reinterpret_cast<const int *>(inputs[5]),
-                                                        reinterpret_cast<const int *>(inputs[6]),
-                                                        reinterpret_cast<float *>(outputs[0]));
-        }
-        else{
-            // printf("bevpool : fp16 fp32\n");
-            bev_pool_v2_kernel<__half, float><<<grid, block, 0, stream>>>(
-                                                        channel, 
-                                                        n_intervals,
-                                                        map_size,
-                                                        img_size,
-                                                        reinterpret_cast<const __half *>(inputs[0]),
-                                                        reinterpret_cast<const __half *>(inputs[1]),
-                                                        reinterpret_cast<const int *>(inputs[2]),
-                                                        reinterpret_cast<const int *>(inputs[3]),
-                                                        reinterpret_cast<const int *>(inputs[4]),
-                                                        reinterpret_cast<const int *>(inputs[5]),
-                                                        reinterpret_cast<const int *>(inputs[6]),
-                                                        reinterpret_cast<float *>(outputs[0]));
-        }
-        break;
+      if (inputDesc[0].type == DataType::kFLOAT) {
+        // printf("bevpool : fp32 fp32\n");
+        bev_pool_v2_kernel<float, float><<<grid, block, 0, stream>>>(
+          channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
+          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
+          reinterpret_cast<float *>(outputs[0]));
+      } else {
+        // printf("bevpool : fp16 fp32\n");
+        bev_pool_v2_kernel<__half, float><<<grid, block, 0, stream>>>(
+          channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
+          reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
+          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
+          reinterpret_cast<float *>(outputs[0]));
+      }
+      break;
     case int(DataType::kHALF):
-        if(inputDesc[0].type == DataType::kFLOAT){
-            // printf("bevpool : fp32 fp16\n");
-            bev_pool_v2_kernel<float, __half><<<grid, block, 0, stream>>>(
-                                                        channel, 
-                                                        n_intervals,
-                                                        map_size,
-                                                        img_size,
-                                                        reinterpret_cast<const float *>(inputs[0]),
-                                                        reinterpret_cast<const float *>(inputs[1]),
-                                                        reinterpret_cast<const int *>(inputs[2]),
-                                                        reinterpret_cast<const int *>(inputs[3]),
-                                                        reinterpret_cast<const int *>(inputs[4]),
-                                                        reinterpret_cast<const int *>(inputs[5]),
-                                                        reinterpret_cast<const int *>(inputs[6]),
-                                                        reinterpret_cast<__half *>(outputs[0]));
-        }
-        else{
-            // printf("bevpool : fp16 fp16\n");
-            bev_pool_v2_kernel<__half, __half><<<grid, block, 0, stream>>>(
-                                                        channel, 
-                                                        n_intervals,
-                                                        map_size,
-                                                        img_size,
-                                                        reinterpret_cast<const __half *>(inputs[0]),
-                                                        reinterpret_cast<const __half *>(inputs[1]),
-                                                        reinterpret_cast<const int *>(inputs[2]),
-                                                        reinterpret_cast<const int *>(inputs[3]),
-                                                        reinterpret_cast<const int *>(inputs[4]),
-                                                        reinterpret_cast<const int *>(inputs[5]),
-                                                        reinterpret_cast<const int *>(inputs[6]),
-                                                        reinterpret_cast<__half *>(outputs[0]));
-        }
-        break;
-    default: // should NOT be here
-        printf("\tUnsupport datatype!\n");
-    }
-    return 0;
-}
-
-void BEVPoolPlugin::destroy() noexcept {
-    delete this;
-    return;
+      if (inputDesc[0].type == DataType::kFLOAT) {
+        // printf("bevpool : fp32 fp16\n");
+        bev_pool_v2_kernel<float, __half><<<grid, block, 0, stream>>>(
+          channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
+          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
+          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
+          reinterpret_cast<__half *>(outputs[0]));
+      } else {
+        // printf("bevpool : fp16 fp16\n");
+        bev_pool_v2_kernel<__half, __half><<<grid, block, 0, stream>>>(
+          channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
+          reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
+          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
+          reinterpret_cast<__half *>(outputs[0]));
+      }
+      break;
+    default:  // should NOT be here
+      printf("\tUnsupport datatype!\n");
+  }
+  return 0;
+}
+
+void BEVPoolPlugin::destroy() noexcept
+{
+  delete this;
+  return;
 }
 
-int32_t BEVPoolPlugin::initialize() noexcept {
-    return 0;
+int32_t BEVPoolPlugin::initialize() noexcept
+{
+  return 0;
 }
 
-void BEVPoolPlugin::terminate() noexcept {
-    return;
+void BEVPoolPlugin::terminate() noexcept
+{
+  return;
 }
 
-size_t BEVPoolPlugin::getSerializationSize() const noexcept {
-    return sizeof(m_);
+size_t BEVPoolPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(m_);
 }
 
-void BEVPoolPlugin::serialize(void *buffer) const noexcept {
-    memcpy(buffer, &m_, sizeof(m_));
-    return;
+void BEVPoolPlugin::serialize(void * buffer) const noexcept
+{
+  memcpy(buffer, &m_, sizeof(m_));
+  return;
 }
 
-void BEVPoolPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void BEVPoolPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *BEVPoolPlugin::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * BEVPoolPlugin::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *BEVPoolPlugin::getPluginType() const noexcept {
-    return BEVPOOL_PLUGIN_NAME;
+const char * BEVPoolPlugin::getPluginType() const noexcept
+{
+  return BEVPOOL_PLUGIN_NAME;
 }
 
-const char *BEVPoolPlugin::getPluginVersion() const noexcept {
-    return BEVPOOL_PLUGIN_VERSION;
+const char * BEVPoolPlugin::getPluginVersion() const noexcept
+{
+  return BEVPOOL_PLUGIN_VERSION;
 }
 
-void BEVPoolPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
-                                                        IGpuAllocator *gpuAllocator) noexcept {
-    return;
+void BEVPoolPlugin::attachToContext(
+  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
+{
+  return;
 }
 
-void BEVPoolPlugin::detachFromContext() noexcept {
-    return;
+void BEVPoolPlugin::detachFromContext() noexcept
+{
+  return;
 }
 
 // class BEVPoolPluginCreator
-PluginFieldCollection    BEVPoolPluginCreator::fc_ {};
+PluginFieldCollection BEVPoolPluginCreator::fc_{};
 std::vector<PluginField> BEVPoolPluginCreator::attr_;
 
-BEVPoolPluginCreator::BEVPoolPluginCreator() {
-    attr_.clear();
-    attr_.emplace_back(PluginField("bev_h", nullptr, PluginFieldType::kINT32, 1));
-    attr_.emplace_back(PluginField("bev_w", nullptr, PluginFieldType::kINT32, 1));
-    attr_.emplace_back(PluginField("n", nullptr, PluginFieldType::kINT32, 1));
-
+BEVPoolPluginCreator::BEVPoolPluginCreator()
+{
+  attr_.clear();
+  attr_.emplace_back(PluginField("bev_h", nullptr, PluginFieldType::kINT32, 1));
+  attr_.emplace_back(PluginField("bev_w", nullptr, PluginFieldType::kINT32, 1));
+  attr_.emplace_back(PluginField("n", nullptr, PluginFieldType::kINT32, 1));
 
-    fc_.nbFields = attr_.size();
-    fc_.fields   = attr_.data();
+  fc_.nbFields = attr_.size();
+  fc_.fields = attr_.data();
 }
 
-BEVPoolPluginCreator::~BEVPoolPluginCreator() {
+BEVPoolPluginCreator::~BEVPoolPluginCreator()
+{
 }
 
-
-IPluginV2DynamicExt *BEVPoolPluginCreator::createPlugin(const char *name, 
-                                    const PluginFieldCollection *fc) noexcept {
-    const PluginField *fields = fc->fields;
-
-    int bev_h = -1;
-    int bev_w = -1;
-    int n = -1;
-
-    for (int i = 0; i < fc->nbFields; ++i){
-        if(std::string(fc->fields[i].name) == std::string("bev_h")){
-            bev_h = *reinterpret_cast<const int *>(fc->fields[i].data);
-        }
-        else if(std::string(fc->fields[i].name) == std::string("bev_w")){
-            bev_w = *reinterpret_cast<const int *>(fc->fields[i].data);
-        }
-        else if(std::string(fc->fields[i].name) == std::string("n")){
-            n = *reinterpret_cast<const int *>(fc->fields[i].data);
-        }
+IPluginV2DynamicExt * BEVPoolPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  const PluginField * fields = fc->fields;
+
+  int bev_h = -1;
+  int bev_w = -1;
+  int n = -1;
+
+  for (int i = 0; i < fc->nbFields; ++i) {
+    if (std::string(fc->fields[i].name) == std::string("bev_h")) {
+      bev_h = *reinterpret_cast<const int *>(fc->fields[i].data);
+    } else if (std::string(fc->fields[i].name) == std::string("bev_w")) {
+      bev_w = *reinterpret_cast<const int *>(fc->fields[i].data);
+    } else if (std::string(fc->fields[i].name) == std::string("n")) {
+      n = *reinterpret_cast<const int *>(fc->fields[i].data);
     }
-    BEVPoolPlugin *pObj = new BEVPoolPlugin(name, bev_h, bev_w, n);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+  }
+  BEVPoolPlugin * pObj = new BEVPoolPlugin(name, bev_h, bev_w, n);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-IPluginV2DynamicExt *BEVPoolPluginCreator::deserializePlugin(const char *name, 
-                                        const void *serialData, size_t serialLength) noexcept {
-    BEVPoolPlugin *pObj = new BEVPoolPlugin(name, serialData, serialLength);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * BEVPoolPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  BEVPoolPlugin * pObj = new BEVPoolPlugin(name, serialData, serialLength);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-void BEVPoolPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void BEVPoolPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *BEVPoolPluginCreator::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * BEVPoolPluginCreator::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *BEVPoolPluginCreator::getPluginName() const noexcept {
-    return BEVPOOL_PLUGIN_NAME;
+const char * BEVPoolPluginCreator::getPluginName() const noexcept
+{
+  return BEVPOOL_PLUGIN_NAME;
 }
 
-const char *BEVPoolPluginCreator::getPluginVersion() const noexcept {
-    return BEVPOOL_PLUGIN_VERSION;
+const char * BEVPoolPluginCreator::getPluginVersion() const noexcept
+{
+  return BEVPOOL_PLUGIN_VERSION;
 }
 
-const PluginFieldCollection *BEVPoolPluginCreator::getFieldNames() noexcept {
-    return &fc_;
+const PluginFieldCollection * BEVPoolPluginCreator::getFieldNames() noexcept
+{
+  return &fc_;
 }
 
 REGISTER_TENSORRT_PLUGIN(BEVPoolPluginCreator);
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index 2df63c21701fc..1205e6fe71bdd 100644
--- a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -1,73 +1,80 @@
-#include <stddef.h>
-#include <stdio.h>
-#include <jpeglib.h>
-#include <setjmp.h>
-#include <vector>
-#include <cuda_runtime.h>
+#include "cpu_jpegdecoder.hpp"
+
 #include "preprocess.hpp"
 
-#include "cpu_jpegdecoder.hpp"
+#include <cuda_runtime.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdio.h>
 
+#include <vector>
 
-struct jpeg_error_handler {
-    struct jpeg_error_mgr pub;
-    jmp_buf setjmp_buffer;
+struct jpeg_error_handler
+{
+  struct jpeg_error_mgr pub;
+  jmp_buf setjmp_buffer;
 };
 
-int decode_jpeg(const std::vector<char>& buffer, uchar* output) {
-    struct jpeg_decompress_struct cinfo;
-    struct jpeg_error_handler jerr;
-    cinfo.err = jpeg_std_error(&jerr.pub);
+int decode_jpeg(const std::vector<char> & buffer, uchar * output)
+{
+  struct jpeg_decompress_struct cinfo;
+  struct jpeg_error_handler jerr;
+  cinfo.err = jpeg_std_error(&jerr.pub);
 
-    jerr.pub.error_exit = [](j_common_ptr cinfo) {
-        jpeg_error_handler* myerr = (jpeg_error_handler*)cinfo->err;
-        longjmp(myerr->setjmp_buffer, 1);
-    };
+  jerr.pub.error_exit = [](j_common_ptr cinfo) {
+    jpeg_error_handler * myerr = (jpeg_error_handler *)cinfo->err;
+    longjmp(myerr->setjmp_buffer, 1);
+  };
 
-    if(setjmp(jerr.setjmp_buffer)) {
-        printf("\033[31mFailed to decompress jpeg: %s\033[0m\n", jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
-        jpeg_destroy_decompress(&cinfo);
-        return EXIT_FAILURE ;
-    }
-    jpeg_create_decompress(&cinfo);
+  if (setjmp(jerr.setjmp_buffer)) {
+    printf(
+      "\033[31mFailed to decompress jpeg: %s\033[0m\n",
+      jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
+    jpeg_destroy_decompress(&cinfo);
+    return EXIT_FAILURE;
+  }
+  jpeg_create_decompress(&cinfo);
 
-    if(buffer.size() == 0) {
-        printf("buffer size is 0");
-        return EXIT_FAILURE;
-    }
-    jpeg_mem_src(&cinfo, (uchar*)buffer.data(), buffer.size());
-    if(jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
-        printf("\033[31mFailed to read jpeg header\033[0m\n");
-        return EXIT_FAILURE;
-    }
-    if(jpeg_start_decompress(&cinfo) != TRUE) {
-        printf("\033[31mFailed to start decompress\033[0m\n");
-        return EXIT_FAILURE;
-    }
+  if (buffer.size() == 0) {
+    printf("buffer size is 0");
+    return EXIT_FAILURE;
+  }
+  jpeg_mem_src(&cinfo, (uchar *)buffer.data(), buffer.size());
+  if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
+    printf("\033[31mFailed to read jpeg header\033[0m\n");
+    return EXIT_FAILURE;
+  }
+  if (jpeg_start_decompress(&cinfo) != TRUE) {
+    printf("\033[31mFailed to start decompress\033[0m\n");
+    return EXIT_FAILURE;
+  }
 
-    while (cinfo.output_scanline < cinfo.output_height) {
-        auto row = cinfo.output_scanline;
-        uchar* ptr = output + row * cinfo.image_width * 3;
-        jpeg_read_scanlines(&cinfo, (JSAMPARRAY)&ptr, 1);
-    }
-    jpeg_finish_decompress(&cinfo);
-    jpeg_destroy_decompress(&cinfo);
-    return EXIT_SUCCESS ;
+  while (cinfo.output_scanline < cinfo.output_height) {
+    auto row = cinfo.output_scanline;
+    uchar * ptr = output + row * cinfo.image_width * 3;
+    jpeg_read_scanlines(&cinfo, (JSAMPARRAY)&ptr, 1);
+  }
+  jpeg_finish_decompress(&cinfo);
+  jpeg_destroy_decompress(&cinfo);
+  return EXIT_SUCCESS;
 }
 
-int decode_cpu(const std::vector<std::vector<char>> &files_data, uchar* out_imgs, size_t width, size_t height) {
-    uchar* temp = new uchar[width * height * 3];
-    uchar* temp_gpu = nullptr;
-    CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
-    for (size_t i = 0; i < files_data.size(); i++) {
-        if (decode_jpeg(files_data[i], temp)) {
-            return EXIT_FAILURE;
-        }
-        CHECK_CUDA(cudaMemcpy(temp_gpu, temp, width * height * 3, cudaMemcpyHostToDevice));
-        convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
-        CHECK_CUDA(cudaDeviceSynchronize());
+int decode_cpu(
+  const std::vector<std::vector<char>> & files_data, uchar * out_imgs, size_t width, size_t height)
+{
+  uchar * temp = new uchar[width * height * 3];
+  uchar * temp_gpu = nullptr;
+  CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
+  for (size_t i = 0; i < files_data.size(); i++) {
+    if (decode_jpeg(files_data[i], temp)) {
+      return EXIT_FAILURE;
     }
-    CHECK_CUDA(cudaFree(temp_gpu));
-    delete[] temp;
-    return EXIT_SUCCESS;
+    CHECK_CUDA(cudaMemcpy(temp_gpu, temp, width * height * 3, cudaMemcpyHostToDevice));
+    convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
+    CHECK_CUDA(cudaDeviceSynchronize());
+  }
+  CHECK_CUDA(cudaFree(temp_gpu));
+  delete[] temp;
+  return EXIT_SUCCESS;
 }
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index cd9a9de73bfd3..b22ac49d7c209 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -1,195 +1,187 @@
-#include <cstdio>
-#include <stdlib.h>
-
-#include <iostream>
-#include <fstream>
-#include <chrono>
-
 #include "data.hpp"
-#include "cpu_jpegdecoder.hpp"
-
-using std::chrono::high_resolution_clock;
-using std::chrono::duration;
 
-camParams::camParams(const YAML::Node &config, int n, std::vector<std::string> &cams_name) :
-                                                                    N_img(n){
-    
-    if((size_t)n != cams_name.size()){
-        std::cerr << "Error! Need " << n << " camera param, bug given " << cams_name.size() << " camera names!" << std::endl;
-    }
-    ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
-    ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
-
-    lidar2ego_rot = fromYamlQuater(config["lidar2ego_rotation"]);
-    lidar2ego_trans = fromYamlTrans(config["lidar2ego_translation"]);
-
-    timestamp = config["timestamp"].as<unsigned long long>();
-    scene_token = config["scene_token"].as<std::string>();
+#include "cpu_jpegdecoder.hpp"
 
-    imgs_file.clear();
+#include <stdlib.h>
 
-    cams_intrin.clear();
-    cams2ego_rot.clear();
-    cams2ego_trans.clear();
-    
-    for(std::string name : cams_name){
-        imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
+#include <chrono>
+#include <cstdio>
+#include <fstream>
+#include <iostream>
 
-        //
-        cams_intrin.push_back(fromYamlMatrix3f(config["cams"][name]["cam_intrinsic"]));
-        cams2ego_rot.push_back(fromYamlQuater(config["cams"][name]["sensor2ego_rotation"]));
-        cams2ego_trans.push_back(fromYamlTrans(config["cams"][name]["sensor2ego_translation"]));
-        //
+using std::chrono::duration;
+using std::chrono::high_resolution_clock;
 
-    }
+camParams::camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name)
+: N_img(n)
+{
+  if ((size_t)n != cams_name.size()) {
+    std::cerr << "Error! Need " << n << " camera param, bug given " << cams_name.size()
+              << " camera names!" << std::endl;
+  }
+  ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
+  ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
+
+  lidar2ego_rot = fromYamlQuater(config["lidar2ego_rotation"]);
+  lidar2ego_trans = fromYamlTrans(config["lidar2ego_translation"]);
+
+  timestamp = config["timestamp"].as<unsigned long long>();
+  scene_token = config["scene_token"].as<std::string>();
+
+  imgs_file.clear();
+
+  cams_intrin.clear();
+  cams2ego_rot.clear();
+  cams2ego_trans.clear();
+
+  for (std::string name : cams_name) {
+    imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
+
+    //
+    cams_intrin.push_back(fromYamlMatrix3f(config["cams"][name]["cam_intrinsic"]));
+    cams2ego_rot.push_back(fromYamlQuater(config["cams"][name]["sensor2ego_rotation"]));
+    cams2ego_trans.push_back(fromYamlTrans(config["cams"][name]["sensor2ego_translation"]));
+    //
+  }
 }
 
-
-DataLoader::DataLoader(int _n_img, 
-                       int _h, 
-                       int _w,
-                       const std::string &_data_infos_path,
-                       const std::vector<std::string> &_cams_name,
-                       bool _sep):
-                       n_img(_n_img),
-                       cams_intrin(_n_img), 
-                       cams2ego_rot(_n_img),
-                       cams2ego_trans(_n_img),
+DataLoader::DataLoader(
+  int _n_img, int _h, int _w, const std::string & _data_infos_path,
+  const std::vector<std::string> & _cams_name, bool _sep)
+: n_img(_n_img),
+  cams_intrin(_n_img),
+  cams2ego_rot(_n_img),
+  cams2ego_trans(_n_img),
 #ifdef __HAVE_NVJPEG__
-                       nvdecoder(_n_img, DECODE_BGR),  
+  nvdecoder(_n_img, DECODE_BGR),
 #endif
-                       img_h(_h),
-                       img_w(_w),
-                       cams_name(_cams_name),
-                       data_infos_path(_data_infos_path),
-                       separate(_sep) {
-    
-
-    YAML::Node temp_seq = YAML::LoadFile(data_infos_path + "/time_sequence.yaml");
-    time_sequence = temp_seq["time_sequence"].as<std::vector<int>>();
-    sample_num = time_sequence.size();
-
-    cams_param.resize(sample_num);
-
-    if(separate == false){
-        YAML::Node infos = YAML::LoadFile(data_infos_path + "/samples_info.yaml");
-
-        for(size_t i = 0; i < cams_name.size(); i++){
-            cams_intrin[i] = fromYamlMatrix3f(infos[0]["cams"][cams_name[i]]["cam_intrinsic"]);
-            cams2ego_rot[i] = fromYamlQuater(infos[0]["cams"][cams_name[i]]
-                                                            ["sensor2ego_rotation"]);
-            cams2ego_trans[i] = fromYamlTrans(infos[0]["cams"][cams_name[i]]
-                                                            ["sensor2ego_translation"]);
-        }
-        lidar2ego_rot = fromYamlQuater(infos[0]["lidar2ego_rotation"]);
-        lidar2ego_trans = fromYamlTrans(infos[0]["lidar2ego_translation"]);
-
-        for(int i = 0; i < sample_num; i++){
-            cams_param[i] = camParams(infos[i], n_img, cams_name);
-        }
+  img_h(_h),
+  img_w(_w),
+  cams_name(_cams_name),
+  data_infos_path(_data_infos_path),
+  separate(_sep)
+{
+  YAML::Node temp_seq = YAML::LoadFile(data_infos_path + "/time_sequence.yaml");
+  time_sequence = temp_seq["time_sequence"].as<std::vector<int>>();
+  sample_num = time_sequence.size();
+
+  cams_param.resize(sample_num);
+
+  if (separate == false) {
+    YAML::Node infos = YAML::LoadFile(data_infos_path + "/samples_info.yaml");
+
+    for (size_t i = 0; i < cams_name.size(); i++) {
+      cams_intrin[i] = fromYamlMatrix3f(infos[0]["cams"][cams_name[i]]["cam_intrinsic"]);
+      cams2ego_rot[i] = fromYamlQuater(infos[0]["cams"][cams_name[i]]["sensor2ego_rotation"]);
+      cams2ego_trans[i] = fromYamlTrans(infos[0]["cams"][cams_name[i]]["sensor2ego_translation"]);
     }
-    else{
-        YAML::Node config0 = YAML::LoadFile(data_infos_path + "/samples_info/sample0000.yaml");
-
-        for(size_t i = 0; i < cams_name.size(); i++){
-            cams_intrin[i] = fromYamlMatrix3f(config0["cams"][cams_name[i]]["cam_intrinsic"]);
-            cams2ego_rot[i] = fromYamlQuater(config0["cams"][cams_name[i]]
-                                                            ["sensor2ego_rotation"]);
-            cams2ego_trans[i] = fromYamlTrans(config0["cams"][cams_name[i]]
-                                                            ["sensor2ego_translation"]);
-        }
-        lidar2ego_rot = fromYamlQuater(config0["lidar2ego_rotation"]);
-        lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
+    lidar2ego_rot = fromYamlQuater(infos[0]["lidar2ego_rotation"]);
+    lidar2ego_trans = fromYamlTrans(infos[0]["lidar2ego_translation"]);
+
+    for (int i = 0; i < sample_num; i++) {
+      cams_param[i] = camParams(infos[i], n_img, cams_name);
     }
+  } else {
+    YAML::Node config0 = YAML::LoadFile(data_infos_path + "/samples_info/sample0000.yaml");
 
-    CHECK_CUDA(cudaMalloc((void**)&imgs_dev, n_img * img_h * img_w * 3 * sizeof(uchar)));
-}
+    for (size_t i = 0; i < cams_name.size(); i++) {
+      cams_intrin[i] = fromYamlMatrix3f(config0["cams"][cams_name[i]]["cam_intrinsic"]);
+      cams2ego_rot[i] = fromYamlQuater(config0["cams"][cams_name[i]]["sensor2ego_rotation"]);
+      cams2ego_trans[i] = fromYamlTrans(config0["cams"][cams_name[i]]["sensor2ego_translation"]);
+    }
+    lidar2ego_rot = fromYamlQuater(config0["lidar2ego_rotation"]);
+    lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
+  }
 
+  CHECK_CUDA(cudaMalloc((void **)&imgs_dev, n_img * img_h * img_w * 3 * sizeof(uchar)));
+}
 
-const camsData& DataLoader::data(int idx, bool time_order){
-    if(time_order){
-        idx = time_sequence[idx];
-    }
-    printf("------time_sequence idx : %d ---------\n", idx);
-    if(separate == false){
-        cams_data.param = cams_param[idx];
-    }
-    else{
-        char str_idx[50];
-        sprintf(str_idx, "/samples_info/sample%04d.yaml", idx);
-        YAML::Node config_idx = YAML::LoadFile(data_infos_path + str_idx);
-        cams_data.param = camParams(config_idx, n_img, cams_name);
-    }
-    imgs_data.clear();
-    if(read_sample(cams_data.param.imgs_file, imgs_data)){
-        exit(1);
-    }
+const camsData & DataLoader::data(int idx, bool time_order)
+{
+  if (time_order) {
+    idx = time_sequence[idx];
+  }
+  printf("------time_sequence idx : %d ---------\n", idx);
+  if (separate == false) {
+    cams_data.param = cams_param[idx];
+  } else {
+    char str_idx[50];
+    sprintf(str_idx, "/samples_info/sample%04d.yaml", idx);
+    YAML::Node config_idx = YAML::LoadFile(data_infos_path + str_idx);
+    cams_data.param = camParams(config_idx, n_img, cams_name);
+  }
+  imgs_data.clear();
+  if (read_sample(cams_data.param.imgs_file, imgs_data)) {
+    exit(1);
+  }
 #ifdef __HAVE_NVJPEG__
-    nvdecoder.decode(imgs_data, imgs_dev);
+  nvdecoder.decode(imgs_data, imgs_dev);
 #else
-    decode_cpu(imgs_data, imgs_dev, img_w, img_h);
-    printf("decode on cpu!\n");
+  decode_cpu(imgs_data, imgs_dev, img_w, img_h);
+  printf("decode on cpu!\n");
 #endif
-    cams_data.imgs_dev = imgs_dev;
-    return cams_data;
+  cams_data.imgs_dev = imgs_dev;
+  return cams_data;
 }
 
-DataLoader::~DataLoader(){
-    CHECK_CUDA(cudaFree(imgs_dev));
+DataLoader::~DataLoader()
+{
+  CHECK_CUDA(cudaFree(imgs_dev));
 }
 
-
-int read_image(std::string &image_names, std::vector<char> &raw_data){
-
-    std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
-
-    if (!(input.is_open())){
-        std::cerr << "Cannot open image: " << image_names << std::endl;
-        return EXIT_FAILURE;
-    }
-
-    std::streamsize file_size = input.tellg();
-    input.seekg(0, std::ios::beg);
-    if (raw_data.size() < (size_t)file_size){
-        raw_data.resize(file_size);
-    }
-    if (!input.read(raw_data.data(), file_size)){
-        std::cerr << "Cannot read from file: " << image_names << std::endl;
-        return EXIT_FAILURE;
-    }
-    return EXIT_SUCCESS;
+int read_image(std::string & image_names, std::vector<char> & raw_data)
+{
+  std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
+
+  if (!(input.is_open())) {
+    std::cerr << "Cannot open image: " << image_names << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::streamsize file_size = input.tellg();
+  input.seekg(0, std::ios::beg);
+  if (raw_data.size() < (size_t)file_size) {
+    raw_data.resize(file_size);
+  }
+  if (!input.read(raw_data.data(), file_size)) {
+    std::cerr << "Cannot read from file: " << image_names << std::endl;
+    return EXIT_FAILURE;
+  }
+  return EXIT_SUCCESS;
 }
 
-int read_sample(std::vector<std::string> &imgs_file, std::vector<std::vector<char>> &imgs_data){
+int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
+{
+  imgs_data.resize(imgs_file.size());
 
-    imgs_data.resize(imgs_file.size());
-
-    for(size_t i = 0; i < imgs_data.size(); i++){
-        if(read_image(imgs_file[i], imgs_data[i])){
-            return EXIT_FAILURE;
-        }
+  for (size_t i = 0; i < imgs_data.size(); i++) {
+    if (read_image(imgs_file[i], imgs_data[i])) {
+      return EXIT_FAILURE;
     }
-    return EXIT_SUCCESS;
+  }
+  return EXIT_SUCCESS;
 }
 
-
-Eigen::Translation3f fromYamlTrans(YAML::Node x){
-    std::vector<float> trans = x.as<std::vector<float>>();
-    return Eigen::Translation3f(trans[0], trans[1], trans[2]);
+Eigen::Translation3f fromYamlTrans(YAML::Node x)
+{
+  std::vector<float> trans = x.as<std::vector<float>>();
+  return Eigen::Translation3f(trans[0], trans[1], trans[2]);
 }
 
-Eigen::Quaternion<float> fromYamlQuater(YAML::Node x){
-    std::vector<float> quater = x.as<std::vector<float>>();
-    return Eigen::Quaternion<float>(quater[0], quater[1], quater[2], quater[3]);
+Eigen::Quaternion<float> fromYamlQuater(YAML::Node x)
+{
+  std::vector<float> quater = x.as<std::vector<float>>();
+  return Eigen::Quaternion<float>(quater[0], quater[1], quater[2], quater[3]);
 }
 
-Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x){
-    std::vector<std::vector<float>> m = x.as<std::vector<std::vector<float>>>();
-    Eigen::Matrix3f mat;
-    for(size_t i = 0; i < m.size(); i++){
-        for(size_t j = 0; j < m[0].size(); j++){
-            mat(i, j) = m[i][j];
-        }
+Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x)
+{
+  std::vector<std::vector<float>> m = x.as<std::vector<std::vector<float>>>();
+  Eigen::Matrix3f mat;
+  for (size_t i = 0; i < m.size(); i++) {
+    for (size_t j = 0; j < m[0].size(); j++) {
+      mat(i, j) = m[i][j];
     }
-    return mat;
+  }
+  return mat;
 }
diff --git a/perception/tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
index 16972cf8891bb..6ac903625d093 100644
--- a/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
+++ b/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
-#include "gatherbev_plugin.hpp"
 #include "common.hpp"
+#include "gatherbev_plugin.hpp"
+
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
 
@@ -23,274 +24,288 @@
 // input[1] == curr_feat  b x 80 x 128 x 128
 // input[2] == flag       b x 1
 // out[0]                 b x (8+1)*80 x 128 x 128
-template<typename T>
-__global__ void copy_feat_kernel(int nthreads, // b * (adj_num + 1) * map_size
-                                int adj_num,
-                                int channel,
-                                int map_size,
-                                const T* adj_feats,
-                                const T* curr_feat,
-                                const int* flag,
-                                T* out_feats){
-    CUDA_1D_KERNEL_LOOP(idx, nthreads){
-        int b = idx / ((adj_num + 1) * map_size);
-        int n = (idx / map_size) % (adj_num + 1);
-        int m = idx % map_size;
-
-        int start = b * (adj_num + 1) * channel * map_size + n * channel * map_size + m;
-        int end = start + channel * map_size;
-        for(int i = start, c = 0; i < end; i += map_size, c++){
-            if(flag[b] == 0 || n == 0){
-                out_feats[i] = curr_feat[b * channel * map_size + c * map_size + m];
-            }
-            else{
-                out_feats[i] = adj_feats[i - channel * map_size];
-            }
-        }
+template <typename T>
+__global__ void copy_feat_kernel(
+  int nthreads,  // b * (adj_num + 1) * map_size
+  int adj_num, int channel, int map_size, const T * adj_feats, const T * curr_feat,
+  const int * flag, T * out_feats)
+{
+  CUDA_1D_KERNEL_LOOP(idx, nthreads)
+  {
+    int b = idx / ((adj_num + 1) * map_size);
+    int n = (idx / map_size) % (adj_num + 1);
+    int m = idx % map_size;
+
+    int start = b * (adj_num + 1) * channel * map_size + n * channel * map_size + m;
+    int end = start + channel * map_size;
+    for (int i = start, c = 0; i < end; i += map_size, c++) {
+      if (flag[b] == 0 || n == 0) {
+        out_feats[i] = curr_feat[b * channel * map_size + c * map_size + m];
+      } else {
+        out_feats[i] = adj_feats[i - channel * map_size];
+      }
     }
+  }
 }
 
-
 namespace nvinfer1
 {
 // class GatherBEVPlugin
-GatherBEVPlugin::GatherBEVPlugin(const std::string &name):
-    name_(name){
+GatherBEVPlugin::GatherBEVPlugin(const std::string & name) : name_(name)
+{
 }
 
-GatherBEVPlugin::GatherBEVPlugin(const std::string &name, const void *buffer, size_t length):
-    name_(name){
-    memcpy(&m_, buffer, sizeof(m_));
+GatherBEVPlugin::GatherBEVPlugin(const std::string & name, const void * buffer, size_t length)
+: name_(name)
+{
+  memcpy(&m_, buffer, sizeof(m_));
 }
 
-GatherBEVPlugin::~GatherBEVPlugin(){
+GatherBEVPlugin::~GatherBEVPlugin()
+{
 }
 
-IPluginV2DynamicExt *GatherBEVPlugin::clone() const noexcept {
-    auto p = new GatherBEVPlugin(name_, &m_, sizeof(m_));
-    p->setPluginNamespace(namespace_.c_str());
-    return p;
+IPluginV2DynamicExt * GatherBEVPlugin::clone() const noexcept
+{
+  auto p = new GatherBEVPlugin(name_, &m_, sizeof(m_));
+  p->setPluginNamespace(namespace_.c_str());
+  return p;
 }
 
-int32_t GatherBEVPlugin::getNbOutputs() const noexcept {
-    return 1;
+int32_t GatherBEVPlugin::getNbOutputs() const noexcept
+{
+  return 1;
 }
- 
-DataType GatherBEVPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
-                                                                int32_t nbInputs) const noexcept {
-    return inputTypes[0];  // 与adj_feat一致
+
+DataType GatherBEVPlugin::getOutputDataType(
+  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
+{
+  return inputTypes[0];  // 与adj_feat一致
 }
 
-DimsExprs GatherBEVPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
-                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
+DimsExprs GatherBEVPlugin::getOutputDimensions(
+  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+  IExprBuilder & exprBuilder) noexcept
+{
   // input[0] == adj_feat   b x 8 x 80 x 128 x 128
   // input[1] == curr_feat  b x 80 x 128 x 128
   // input[2] == flag       b x 1
   // out[0]                 b x (8+1)*80 x 128 x 128
-    DimsExprs ret;
-    ret.nbDims = inputs[0].nbDims - 1;
-
-    IDimensionExpr const *n_feat = exprBuilder.operation(DimensionOperation::kSUM, 
-                                                        *inputs[0].d[1],
-                                                        *exprBuilder.constant(1));
-    ret.d[0] = inputs[0].d[0];
-    ret.d[1] = exprBuilder.operation(DimensionOperation::kPROD, *n_feat, *inputs[0].d[2]);
-    ret.d[2] = inputs[0].d[3];
-    ret.d[3] = inputs[0].d[4];
-
-    return ret; 
-}
-
-bool GatherBEVPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
-                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
-    // adj_feat
-    if(pos == 0){
-        return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-                inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 1){ // curr_feat
-        return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 3){ // out
-        return inOut[0].type == inOut[3].type && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    else if(pos == 2){
-        return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
-    }
-    return false;
-}
+  DimsExprs ret;
+  ret.nbDims = inputs[0].nbDims - 1;
+
+  IDimensionExpr const * n_feat =
+    exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[1], *exprBuilder.constant(1));
+  ret.d[0] = inputs[0].d[0];
+  ret.d[1] = exprBuilder.operation(DimensionOperation::kPROD, *n_feat, *inputs[0].d[2]);
+  ret.d[2] = inputs[0].d[3];
+  ret.d[3] = inputs[0].d[4];
 
-void GatherBEVPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
-                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
-    return;
+  return ret;
 }
 
-size_t GatherBEVPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
-                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
-    return 0;
+bool GatherBEVPlugin::supportsFormatCombination(
+  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+{
+  // adj_feat
+  if (pos == 0) {
+    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
+           inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 1) {  // curr_feat
+    return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 3) {  // out
+    return inOut[0].type == inOut[3].type && inOut[pos].format == TensorFormat::kLINEAR;
+  } else if (pos == 2) {
+    return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
+  }
+  return false;
+}
+
+void GatherBEVPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+  int32_t nbOutputs) noexcept
+{
+  return;
 }
 
-int32_t GatherBEVPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
-    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+size_t GatherBEVPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+  int32_t nbOutputs) const noexcept
+{
+  return 0;
+}
 
+int32_t GatherBEVPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
   // input[0] == adj_feat   b x 8 x 80 x 128 x 128
   // input[1] == curr_feat  b x 80 x 128 x 128
   // input[2] == flag       b x 1
   // out[0]                 b x (8+1)*80 x 128 x 128
 
-    // int nthreads, // b * (adj_num + 1) * map_size
-    // int adj_num,
-    // int channel,
-    // int map_size,
-
-    // int flag = 0;
-    // CHECK_CUDA(cudaMemcpy(&flag, inputs[2], sizeof(int), cudaMemcpyDeviceToHost));
+  // int nthreads, // b * (adj_num + 1) * map_size
+  // int adj_num,
+  // int channel,
+  // int map_size,
 
-    int b = inputDesc[0].dims.d[0];
-    int adj_num = inputDesc[0].dims.d[1];
-    int map_size = inputDesc[0].dims.d[3] * inputDesc[0].dims.d[4];
-    int channel = inputDesc[0].dims.d[2];
+  // int flag = 0;
+  // CHECK_CUDA(cudaMemcpy(&flag, inputs[2], sizeof(int), cudaMemcpyDeviceToHost));
 
-    int feat_step = inputDesc[1].dims.d[1] * inputDesc[1].dims.d[2] * inputDesc[1].dims.d[3];
+  int b = inputDesc[0].dims.d[0];
+  int adj_num = inputDesc[0].dims.d[1];
+  int map_size = inputDesc[0].dims.d[3] * inputDesc[0].dims.d[4];
+  int channel = inputDesc[0].dims.d[2];
 
-    int nthreads = b * (adj_num + 1) * map_size;
+  int feat_step = inputDesc[1].dims.d[1] * inputDesc[1].dims.d[2] * inputDesc[1].dims.d[3];
 
-    dim3 grid(GET_BLOCKS(nthreads));
-    dim3 block(NUM_THREADS);
-    // printf("GatherBEV input adj_feats %s\n", dataTypeToString(inputDesc[0].type).c_str());
-    // printf("GatherBEV input curr_feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
-    // printf("GatherBEV output bevfeats %s\n", dataTypeToString(outputDesc[0].type).c_str());
+  int nthreads = b * (adj_num + 1) * map_size;
 
+  dim3 grid(GET_BLOCKS(nthreads));
+  dim3 block(NUM_THREADS);
+  // printf("GatherBEV input adj_feats %s\n", dataTypeToString(inputDesc[0].type).c_str());
+  // printf("GatherBEV input curr_feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
+  // printf("GatherBEV output bevfeats %s\n", dataTypeToString(outputDesc[0].type).c_str());
 
-    switch (int(outputDesc[0].type))
-    {
+  switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
-        // printf("gather : fp32\n");
-        copy_feat_kernel<<<grid, block, 0, stream>>>(nthreads,
-                                                    adj_num,
-                                                    channel,
-                                                    map_size,
-                                                    reinterpret_cast<const float*>(inputs[0]),
-                                                    reinterpret_cast<const float*>(inputs[1]),
-                                                    reinterpret_cast<const int*>(inputs[2]),
-                                                    reinterpret_cast<float*>(outputs[0]));
-        
-
-        break;
-    case int(DataType::kHALF):
-        // printf("gather : fp16\n");
-        copy_feat_kernel<<<grid, block, 0, stream>>>(nthreads,
-                                                    adj_num,
-                                                    channel,
-                                                    map_size,
-                                                    reinterpret_cast<const __half*>(inputs[0]),
-                                                    reinterpret_cast<const __half*>(inputs[1]),
-                                                    reinterpret_cast<const int*>(inputs[2]),
-                                                    reinterpret_cast<__half*>(outputs[0]));
-        break;
-    default: // should NOT be here
-        printf("\tUnsupport datatype!\n");
-    }
-    return 0;
-}
+      // printf("gather : fp32\n");
+      copy_feat_kernel<<<grid, block, 0, stream>>>(
+        nthreads, adj_num, channel, map_size, reinterpret_cast<const float *>(inputs[0]),
+        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+        reinterpret_cast<float *>(outputs[0]));
 
-void GatherBEVPlugin::destroy() noexcept {
-    delete this;
-    return;
+      break;
+    case int(DataType::kHALF):
+      // printf("gather : fp16\n");
+      copy_feat_kernel<<<grid, block, 0, stream>>>(
+        nthreads, adj_num, channel, map_size, reinterpret_cast<const __half *>(inputs[0]),
+        reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
+        reinterpret_cast<__half *>(outputs[0]));
+      break;
+    default:  // should NOT be here
+      printf("\tUnsupport datatype!\n");
+  }
+  return 0;
+}
+
+void GatherBEVPlugin::destroy() noexcept
+{
+  delete this;
+  return;
 }
 
-int32_t GatherBEVPlugin::initialize() noexcept {
-    return 0;
+int32_t GatherBEVPlugin::initialize() noexcept
+{
+  return 0;
 }
 
-void GatherBEVPlugin::terminate() noexcept {
-    return;
+void GatherBEVPlugin::terminate() noexcept
+{
+  return;
 }
 
-size_t GatherBEVPlugin::getSerializationSize() const noexcept {
-    return sizeof(m_);
+size_t GatherBEVPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(m_);
 }
 
-void GatherBEVPlugin::serialize(void *buffer) const noexcept {
-    memcpy(buffer, &m_, sizeof(m_));
-    return;
+void GatherBEVPlugin::serialize(void * buffer) const noexcept
+{
+  memcpy(buffer, &m_, sizeof(m_));
+  return;
 }
 
-void GatherBEVPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void GatherBEVPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *GatherBEVPlugin::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * GatherBEVPlugin::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *GatherBEVPlugin::getPluginType() const noexcept {
-    return GATHERBEV_PLUGIN_NAME;
+const char * GatherBEVPlugin::getPluginType() const noexcept
+{
+  return GATHERBEV_PLUGIN_NAME;
 }
 
-const char *GatherBEVPlugin::getPluginVersion() const noexcept {
-    return GATHERBEV_PLUGIN_VERSION;
+const char * GatherBEVPlugin::getPluginVersion() const noexcept
+{
+  return GATHERBEV_PLUGIN_VERSION;
 }
 
-void GatherBEVPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
-                                                        IGpuAllocator *gpuAllocator) noexcept {
-    return;
+void GatherBEVPlugin::attachToContext(
+  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
+{
+  return;
 }
 
-void GatherBEVPlugin::detachFromContext() noexcept {
-    return;
+void GatherBEVPlugin::detachFromContext() noexcept
+{
+  return;
 }
 
 // class GatherBEVPluginCreator
-PluginFieldCollection    GatherBEVPluginCreator::fc_ {};
+PluginFieldCollection GatherBEVPluginCreator::fc_{};
 std::vector<PluginField> GatherBEVPluginCreator::attr_;
 
-GatherBEVPluginCreator::GatherBEVPluginCreator() {
-    attr_.clear();
-    fc_.nbFields = attr_.size();
-    fc_.fields   = attr_.data();
+GatherBEVPluginCreator::GatherBEVPluginCreator()
+{
+  attr_.clear();
+  fc_.nbFields = attr_.size();
+  fc_.fields = attr_.data();
 }
 
-GatherBEVPluginCreator::~GatherBEVPluginCreator() {
+GatherBEVPluginCreator::~GatherBEVPluginCreator()
+{
 }
 
-
-IPluginV2DynamicExt *GatherBEVPluginCreator::createPlugin(const char *name, 
-                                    const PluginFieldCollection *fc) noexcept {
-    GatherBEVPlugin *pObj = new GatherBEVPlugin(name);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * GatherBEVPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  GatherBEVPlugin * pObj = new GatherBEVPlugin(name);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-IPluginV2DynamicExt *GatherBEVPluginCreator::deserializePlugin(const char *name, 
-                                        const void *serialData, size_t serialLength) noexcept {
-    GatherBEVPlugin *pObj = new GatherBEVPlugin(name, serialData, serialLength);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * GatherBEVPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  GatherBEVPlugin * pObj = new GatherBEVPlugin(name, serialData, serialLength);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-void GatherBEVPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void GatherBEVPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *GatherBEVPluginCreator::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * GatherBEVPluginCreator::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *GatherBEVPluginCreator::getPluginName() const noexcept {
-    return GATHERBEV_PLUGIN_NAME;
+const char * GatherBEVPluginCreator::getPluginName() const noexcept
+{
+  return GATHERBEV_PLUGIN_NAME;
 }
 
-const char *GatherBEVPluginCreator::getPluginVersion() const noexcept {
-    return GATHERBEV_PLUGIN_VERSION;
+const char * GatherBEVPluginCreator::getPluginVersion() const noexcept
+{
+  return GATHERBEV_PLUGIN_VERSION;
 }
 
-const PluginFieldCollection *GatherBEVPluginCreator::getFieldNames() noexcept {
-    return &fc_;
+const PluginFieldCollection * GatherBEVPluginCreator::getFieldNames() noexcept
+{
+  return &fc_;
 }
 
 REGISTER_TENSORRT_PLUGIN(GatherBEVPluginCreator);
 
-} // namespace nvinfer1
+}  // namespace nvinfer1
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
index ef532e43049f6..df11848598f2c 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -6,43 +6,43 @@ All Rights Reserved 2019-2022.
 
 #include "iou3d_nms.hpp"
 
-struct Point {
+struct Point
+{
   float x, y;
   __device__ Point() {}
   __device__ Point(double _x, double _y) { x = _x, y = _y; }
 
-  __device__ void set(float _x, float _y) {
+  __device__ void set(float _x, float _y)
+  {
     x = _x;
     y = _y;
   }
 
-  __device__ Point operator+(const Point& b) const {
-    return Point(x + b.x, y + b.y);
-  }
+  __device__ Point operator+(const Point & b) const { return Point(x + b.x, y + b.y); }
 
-  __device__ Point operator-(const Point& b) const {
-    return Point(x - b.x, y - b.y);
-  }
+  __device__ Point operator-(const Point & b) const { return Point(x - b.x, y - b.y); }
 };
 
-__device__ inline float cross(const Point& a, const Point& b) {
+__device__ inline float cross(const Point & a, const Point & b)
+{
   return a.x * b.y - a.y * b.x;
 }
 
-__device__ inline float cross(const Point& p1, const Point& p2, const Point& p0) {
+__device__ inline float cross(const Point & p1, const Point & p2, const Point & p0)
+{
   return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
 }
 
-__device__ int check_rect_cross(const Point& p1, const Point& p2,
-                                const Point& q1, const Point& q2) {
-  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
-            min(q1.x, q2.x) <= max(p1.x, p2.x) &&
-            min(p1.y, p2.y) <= max(q1.y, q2.y) &&
-            min(q1.y, q2.y) <= max(p1.y, p2.y);
+__device__ int check_rect_cross(
+  const Point & p1, const Point & p2, const Point & q1, const Point & q2)
+{
+  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) && min(q1.x, q2.x) <= max(p1.x, p2.x) &&
+            min(p1.y, p2.y) <= max(q1.y, q2.y) && min(q1.y, q2.y) <= max(p1.y, p2.y);
   return ret;
 }
 
-__device__ inline int check_in_box2d(const float* box, const Point& p) {
+__device__ inline int check_in_box2d(const float * box, const Point & p)
+{
   // params: (7) [x, y, z, dx, dy, dz, heading]
   const float kMargin = 1e-2;
 
@@ -53,13 +53,12 @@ __device__ inline int check_in_box2d(const float* box, const Point& p) {
   float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
   float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
 
-  return (fabs(rot_x) < box[3] / 2 + kMargin &&
-          fabs(rot_y) < box[4] / 2 + kMargin);
+  return (fabs(rot_x) < box[3] / 2 + kMargin && fabs(rot_y) < box[4] / 2 + kMargin);
 }
 
-__device__ inline int intersection(const Point& p1, const Point& p0,
-                                   const Point& q1, const Point& q0,
-                                   Point& ans) {
+__device__ inline int intersection(
+  const Point & p1, const Point & p0, const Point & q1, const Point & q0, Point & ans)
+{
   // fast exclusion
   if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
 
@@ -89,28 +88,27 @@ __device__ inline int intersection(const Point& p1, const Point& p0,
   return 1;
 }
 
-__device__ inline void rotate_around_center(const Point& center,
-                                            const float angle_cos,
-                                            const float angle_sin, 
-                                            Point& p) {
+__device__ inline void rotate_around_center(
+  const Point & center, const float angle_cos, const float angle_sin, Point & p)
+{
   float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
   float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
   p.set(new_x, new_y);
 }
 
-__device__ inline int point_cmp(const Point& a, const Point& b,
-                                const Point& center) {
-  return atan2(a.y - center.y, a.x - center.x) >
-         atan2(b.y - center.y, b.x - center.x);
+__device__ inline int point_cmp(const Point & a, const Point & b, const Point & center)
+{
+  return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x);
 }
 
-__device__ inline float box_overlap(const float* box_a, const float* box_b) {
+__device__ inline float box_overlap(const float * box_a, const float * box_b)
+{
   // params box_a: [x, y, z, dx, dy, dz, heading]
   // params box_b: [x, y, z, dx, dy, dz, heading]
 
   float a_angle = box_a[6], b_angle = box_b[6];
-  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2,
-        a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2;
+  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, a_dy_half = box_a[4] / 2,
+        b_dy_half = box_b[4] / 2;
   float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
   float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
   float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
@@ -121,10 +119,10 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
 
 #ifdef DEBUG
   printf(
-      "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n",
-      a_x1, a_y1, a_x2, a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
-  printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y,
-         center_b.x, center_b.y);
+    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", a_x1, a_y1, a_x2,
+    a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
+  printf(
+    "center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y, center_b.x, center_b.y);
 #endif
 
   Point box_a_corners[5];
@@ -145,15 +143,16 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
 
   for (int k = 0; k < 4; k++) {
 #ifdef DEBUG
-    printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k,
-           box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x,
-           box_b_corners[k].y);
+    printf(
+      "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
+      box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
 #endif
     rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
     rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
 #ifdef DEBUG
-    printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
-           box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
+    printf(
+      "corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y,
+      box_b_corners[k].x, box_b_corners[k].y);
 #endif
   }
 
@@ -168,20 +167,19 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
   poly_center.set(0, 0);
   for (int i = 0; i < 4; i++) {
     for (int j = 0; j < 4; j++) {
-      flag = intersection(box_a_corners[i + 1], box_a_corners[i],
-                          box_b_corners[j + 1], box_b_corners[j],
-                          cross_points[cnt]);
+      flag = intersection(
+        box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j],
+        cross_points[cnt]);
       if (flag) {
         poly_center = poly_center + cross_points[cnt];
         cnt++;
 #ifdef DEBUG
         printf(
-            "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
-            "%.3f)->(%.3f, %.3f) \n",
-            cross_points[cnt - 1].x, cross_points[cnt - 1].y,
-            box_a_corners[i].x, box_a_corners[i].y, box_a_corners[i + 1].x,
-            box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
-            box_b_corners[i + 1].x, box_b_corners[i + 1].y);
+          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
+          "%.3f)->(%.3f, %.3f) \n",
+          cross_points[cnt - 1].x, cross_points[cnt - 1].y, box_a_corners[i].x, box_a_corners[i].y,
+          box_a_corners[i + 1].x, box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
+          box_b_corners[i + 1].x, box_b_corners[i + 1].y);
 #endif
       }
     }
@@ -194,8 +192,8 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
       cross_points[cnt] = box_b_corners[k];
       cnt++;
 #ifdef DEBUG
-      printf("b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x,
-             cross_points[cnt - 1].y);
+      printf(
+        "b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y);
 #endif
     }
     if (check_in_box2d(box_b, box_a_corners[k])) {
@@ -203,8 +201,8 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
       cross_points[cnt] = box_a_corners[k];
       cnt++;
 #ifdef DEBUG
-      printf("a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x,
-             cross_points[cnt - 1].y);
+      printf(
+        "a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y);
 #endif
     }
   }
@@ -227,22 +225,21 @@ __device__ inline float box_overlap(const float* box_a, const float* box_b) {
 #ifdef DEBUG
   printf("cnt=%d\n", cnt);
   for (int i = 0; i < cnt; i++) {
-    printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x,
-           cross_points[i].y);
+    printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x, cross_points[i].y);
   }
 #endif
 
   // get the overlap areas
   float area = 0;
   for (int k = 0; k < cnt - 1; k++) {
-    area += cross(cross_points[k] - cross_points[0],
-                  cross_points[k + 1] - cross_points[0]);
+    area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]);
   }
 
   return fabs(area) / 2.0;
 }
 
-__device__ inline float iou_bev(const float* box_a, const float* box_b) {
+__device__ inline float iou_bev(const float * box_a, const float * box_b)
+{
   // params box_a: [x, y, z, dx, dy, dz, heading]
   // params box_b: [x, y, z, dx, dy, dz, heading]
   float sa = box_a[3] * box_a[4];
@@ -251,7 +248,8 @@ __device__ inline float iou_bev(const float* box_a, const float* box_b) {
   return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
 }
 
-__device__ inline float iou_normal(float const* const a, float const* const b) {
+__device__ inline float iou_normal(float const * const a, float const * const b)
+{
   // params: a: [x, y, z, dx, dy, dz, heading]
   // params: b: [x, y, z, dx, dy, dz, heading]
   float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
@@ -265,9 +263,10 @@ __device__ inline float iou_normal(float const* const a, float const* const b) {
   return interS / fmaxf(Sa + Sb - interS, EPS);
 }
 
-__global__ void boxes_overlap_kernel(const int num_a, const float* boxes_a,
-                                     const int num_b, const float* boxes_b,
-                                     float* ans_overlap) {
+__global__ void boxes_overlap_kernel(
+  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b,
+  float * ans_overlap)
+{
   // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
   // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
   const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
@@ -276,15 +275,15 @@ __global__ void boxes_overlap_kernel(const int num_a, const float* boxes_a,
   if (a_idx >= num_a || b_idx >= num_b) {
     return;
   }
-  const float* cur_box_a = boxes_a + a_idx * kBoxBlockSize;
-  const float* cur_box_b = boxes_b + b_idx * kBoxBlockSize;
+  const float * cur_box_a = boxes_a + a_idx * kBoxBlockSize;
+  const float * cur_box_b = boxes_b + b_idx * kBoxBlockSize;
   float s_overlap = box_overlap(cur_box_a, cur_box_b);
   ans_overlap[a_idx * num_b + b_idx] = s_overlap;
 }
 
-__global__ void boxes_iou_bev_kernel(const int num_a, const float* boxes_a,
-                                     const int num_b, const float* boxes_b,
-                                     float* ans_iou) {
+__global__ void boxes_iou_bev_kernel(
+  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b, float * ans_iou)
+{
   // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
   // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
   const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
@@ -294,16 +293,16 @@ __global__ void boxes_iou_bev_kernel(const int num_a, const float* boxes_a,
     return;
   }
 
-  const float* cur_box_a = boxes_a + a_idx * kBoxBlockSize;
-  const float* cur_box_b = boxes_b + b_idx * kBoxBlockSize;
+  const float * cur_box_a = boxes_a + a_idx * kBoxBlockSize;
+  const float * cur_box_b = boxes_b + b_idx * kBoxBlockSize;
   float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
   ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
 }
 
-__global__ void RotatedNmsKernel(const int boxes_num,
-                                 const float nms_overlap_thresh,
-                                 const float* boxes, 
-                                 unsigned long long* mask) {
+__global__ void RotatedNmsKernel(
+  const int boxes_num, const float nms_overlap_thresh, const float * boxes,
+  unsigned long long * mask)
+{
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
 
@@ -318,19 +317,26 @@ __global__ void RotatedNmsKernel(const int boxes_num,
   __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
 
   if (threadIdx.x < col_size) {
-    block_boxes[threadIdx.x * kBoxBlockSize + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
-    block_boxes[threadIdx.x * kBoxBlockSize + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
-    block_boxes[threadIdx.x * kBoxBlockSize + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
-    block_boxes[threadIdx.x * kBoxBlockSize + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
-    block_boxes[threadIdx.x * kBoxBlockSize + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
-    block_boxes[threadIdx.x * kBoxBlockSize + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
-    block_boxes[threadIdx.x * kBoxBlockSize + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
+    block_boxes[threadIdx.x * kBoxBlockSize + 0] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
+    block_boxes[threadIdx.x * kBoxBlockSize + 1] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
+    block_boxes[threadIdx.x * kBoxBlockSize + 2] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
+    block_boxes[threadIdx.x * kBoxBlockSize + 3] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
+    block_boxes[threadIdx.x * kBoxBlockSize + 4] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
+    block_boxes[threadIdx.x * kBoxBlockSize + 5] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
+    block_boxes[threadIdx.x * kBoxBlockSize + 6] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
   }
   __syncthreads();
 
   if (threadIdx.x < row_size) {
     const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
-    const float* cur_box = boxes + cur_box_idx * kBoxBlockSize;
+    const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
 
     int i = 0;
     unsigned long long t = 0;
@@ -348,10 +354,10 @@ __global__ void RotatedNmsKernel(const int boxes_num,
   }
 }
 
-__global__ void NmsKernel(const int boxes_num,
-                          const float nms_overlap_thresh,
-                          const float* boxes,
-                          unsigned long long* mask) {
+__global__ void NmsKernel(
+  const int boxes_num, const float nms_overlap_thresh, const float * boxes,
+  unsigned long long * mask)
+{
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
 
@@ -366,19 +372,26 @@ __global__ void NmsKernel(const int boxes_num,
   __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
 
   if (threadIdx.x < col_size) {
-    block_boxes[threadIdx.x * kBoxBlockSize + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
-    block_boxes[threadIdx.x * kBoxBlockSize + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
-    block_boxes[threadIdx.x * kBoxBlockSize + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
-    block_boxes[threadIdx.x * kBoxBlockSize + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
-    block_boxes[threadIdx.x * kBoxBlockSize + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
-    block_boxes[threadIdx.x * kBoxBlockSize + 5] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
-    block_boxes[threadIdx.x * kBoxBlockSize + 6] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
+    block_boxes[threadIdx.x * kBoxBlockSize + 0] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
+    block_boxes[threadIdx.x * kBoxBlockSize + 1] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
+    block_boxes[threadIdx.x * kBoxBlockSize + 2] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
+    block_boxes[threadIdx.x * kBoxBlockSize + 3] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
+    block_boxes[threadIdx.x * kBoxBlockSize + 4] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
+    block_boxes[threadIdx.x * kBoxBlockSize + 5] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
+    block_boxes[threadIdx.x * kBoxBlockSize + 6] =
+      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
   }
   __syncthreads();
 
   if (threadIdx.x < row_size) {
     const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
-    const float* cur_box = boxes + cur_box_idx * kBoxBlockSize;
+    const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
 
     int i = 0;
     unsigned long long t = 0;
@@ -396,11 +409,10 @@ __global__ void NmsKernel(const int boxes_num,
   }
 }
 
-__global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
-                                            const float nms_overlap_thresh,
-                                            const float* res_box,
-                                            const int* res_sorted_indices,
-                                            unsigned long long* mask) {
+__global__ void RotatedNmsWithIndicesKernel(
+  const int boxes_num, const float nms_overlap_thresh, const float * res_box,
+  const int * res_sorted_indices, unsigned long long * mask)
+{
   const int row_start = blockIdx.y;
   const int col_start = blockIdx.x;
 
@@ -409,7 +421,7 @@ __global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
 
   __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
 
-  if (threadIdx.x < col_size) { // 419, 420有两种情况，分情况讨论
+  if (threadIdx.x < col_size) {  // 419, 420有两种情况，分情况讨论
     const int col_actual_idx = res_sorted_indices[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
     block_boxes[threadIdx.x * kBoxBlockSize + 0] = res_box[col_actual_idx * kBoxBlockSize + 0];
     block_boxes[threadIdx.x * kBoxBlockSize + 1] = res_box[col_actual_idx * kBoxBlockSize + 1];
@@ -419,9 +431,9 @@ __global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
     block_boxes[threadIdx.x * kBoxBlockSize + 5] = res_box[col_actual_idx * kBoxBlockSize + 5];
     block_boxes[threadIdx.x * kBoxBlockSize + 6] = res_box[col_actual_idx * kBoxBlockSize + 6];
   }
-  __syncthreads(); // 等待块内的线程
+  __syncthreads();  // 等待块内的线程
 
-  if (threadIdx.x < row_size) { //
+  if (threadIdx.x < row_size) {  //
     const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
     const int row_actual_idx = res_sorted_indices[cur_box_idx];
 
@@ -439,7 +451,7 @@ __global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
     int start = 0;
     if (row_start == col_start) {
       start = threadIdx.x + 1;  // 此时不和 [0 ~ threadIdx.x) 的比吗? FIXME
-    }                           // 认为这里是正确的，都不需要和小于自己序号的比，如果col小于row,也不需要比
+    }  // 认为这里是正确的，都不需要和小于自己序号的比，如果col小于row,也不需要比
     for (i = start; i < col_size; i++) {
       if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
         t |= 1ULL << i;
@@ -450,131 +462,135 @@ __global__ void RotatedNmsWithIndicesKernel(const int boxes_num,
     // assume cur_box_idx = 21, col_start = 0, row_start = 0 , threadIdx = 21,
     // mark 21 th box and top 64 boxes
     mask[cur_box_idx * col_blocks + col_start] = t;
-    // 也可以 col_start * boxes_num + cur_box_idx 
+    // 也可以 col_start * boxes_num + cur_box_idx
   }
 }
 
-__global__ void box_assign_kernel(float* reg, float* height, float* dim,
-                                  float* rot, float* boxes, float* score,
-                                  int* label, float* out_score, int* out_label,
-                                  int* validIndexs, int head_x_size,
-                                  int head_y_size) {
+__global__ void box_assign_kernel(
+  float * reg, float * height, float * dim, float * rot, float * boxes, float * score, int * label,
+  float * out_score, int * out_label, int * validIndexs, int head_x_size, int head_y_size)
+{
   int boxId = blockIdx.x;
   int channel = threadIdx.x;
   int idx = validIndexs[boxId];
-  if (channel == 0) { boxes[boxId * kBoxBlockSize + 0] = reg[idx]; } 
-  else if (channel == 1) { boxes[boxId * kBoxBlockSize + 1] = reg[idx + head_x_size * head_y_size]; } 
-  else if (channel == 2) { boxes[boxId * kBoxBlockSize + 2] = height[idx]; } 
-  else if (channel == 3) { boxes[boxId * kBoxBlockSize + 3] = dim[idx]; } 
-  else if (channel == 4) { boxes[boxId * kBoxBlockSize + 4] = dim[idx + head_x_size * head_y_size]; } 
-  else if (channel == 5) { boxes[boxId * kBoxBlockSize + 5] = dim[idx + 2 * head_x_size * head_y_size]; } 
-  else if (channel == 6) {
-    float theta = atan2f(rot[0 * head_x_size * head_y_size + idx],
-                         rot[1 * head_x_size * head_y_size + idx]);
+  if (channel == 0) {
+    boxes[boxId * kBoxBlockSize + 0] = reg[idx];
+  } else if (channel == 1) {
+    boxes[boxId * kBoxBlockSize + 1] = reg[idx + head_x_size * head_y_size];
+  } else if (channel == 2) {
+    boxes[boxId * kBoxBlockSize + 2] = height[idx];
+  } else if (channel == 3) {
+    boxes[boxId * kBoxBlockSize + 3] = dim[idx];
+  } else if (channel == 4) {
+    boxes[boxId * kBoxBlockSize + 4] = dim[idx + head_x_size * head_y_size];
+  } else if (channel == 5) {
+    boxes[boxId * kBoxBlockSize + 5] = dim[idx + 2 * head_x_size * head_y_size];
+  } else if (channel == 6) {
+    float theta =
+      atan2f(rot[0 * head_x_size * head_y_size + idx], rot[1 * head_x_size * head_y_size + idx]);
     theta = -theta - 3.1415926 / 2;
     boxes[boxId * kBoxBlockSize + 6] = theta;
   }
   // else if(channel == 7)
   // out_score[boxId] = score[idx];
-  else if (channel == 8) { out_label[boxId] = label[idx]; }
+  else if (channel == 8) {
+    out_label[boxId] = label[idx];
+  }
 }
 
-void box_assign_launcher(float* reg, float* height, float* dim, float* rot,
-                         float* boxes, float* score, int* label,
-                         float* out_score, int* out_label, int* validIndexs,
-                         int boxSize, int head_x_size, int head_y_size) {
-  box_assign_kernel<<<boxSize, 9>>>(reg, height, dim, rot, boxes, score, label,
-                                    out_score, out_label, validIndexs, head_x_size,
-                                    head_y_size);
+void box_assign_launcher(
+  float * reg, float * height, float * dim, float * rot, float * boxes, float * score, int * label,
+  float * out_score, int * out_label, int * validIndexs, int boxSize, int head_x_size,
+  int head_y_size)
+{
+  box_assign_kernel<<<boxSize, 9>>>(
+    reg, height, dim, rot, boxes, score, label, out_score, out_label, validIndexs, head_x_size,
+    head_y_size);
 }
 
-__global__ void index_assign(int* indexs) {
+__global__ void index_assign(int * indexs)
+{
   int yIdx = blockIdx.x;
   int xIdx = threadIdx.x;
   int idx = yIdx * blockDim.x + xIdx;
   indexs[idx] = idx;
 }
 
-void index_assign_launcher(int* indexs, int head_x_size, int head_y_size) {
+void index_assign_launcher(int * indexs, int head_x_size, int head_y_size)
+{
   index_assign<<<head_x_size, head_y_size>>>(indexs);
 }
 
-void boxes_overlap_launcher(const int num_a, const float* boxes_a,
-                            const int num_b, const float* boxes_b,
-                            float* ans_overlap) {
+void boxes_overlap_launcher(
+  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b,
+  float * ans_overlap)
+{
   dim3 blocks(
-      DIVUP(num_b, THREADS_PER_BLOCK),
-      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
+    DIVUP(num_b, THREADS_PER_BLOCK),
+    DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
   dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
 
-  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
-                                            ans_overlap);
+  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b, ans_overlap);
 #ifdef DEBUG
   cudaDeviceSynchronize();  // for using printf in kernel function
 #endif
 }
 
-void boxes_iou_bev_launcher(const int num_a, const float* boxes_a,
-                            const int num_b, const float* boxes_b,
-                            float* ans_iou) {
+void boxes_iou_bev_launcher(
+  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b, float * ans_iou)
+{
   dim3 blocks(
-      DIVUP(num_b, THREADS_PER_BLOCK),
-      DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
+    DIVUP(num_b, THREADS_PER_BLOCK),
+    DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
   dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
 
-  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b,
-                                            ans_iou);
+  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b, ans_iou);
 #ifdef DEBUG
   cudaDeviceSynchronize();  // for using printf in kernel function
 #endif
 }
 
-void nms_launcher(const float* boxes, unsigned long long* mask, int boxes_num,
-                  float nms_overlap_thresh) {
-  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
-              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+void nms_launcher(
+  const float * boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh)
+{
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
 
   RotatedNmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
 }
 
-void nms_normal_launcher(const float* boxes, unsigned long long* mask,
-                         int boxes_num, float nms_overlap_thresh) {
-  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS),
-              DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
+void nms_normal_launcher(
+  const float * boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh)
+{
+  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
-  NmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes,
-                                         mask);
+  NmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
 }
 
+Iou3dNmsCuda::Iou3dNmsCuda(
+  const int head_x_size, const int head_y_size, const float nms_overlap_thresh)
+: head_x_size_(head_x_size), head_y_size_(head_y_size), nms_overlap_thresh_(nms_overlap_thresh)
+{
+}
 
-Iou3dNmsCuda::Iou3dNmsCuda(const int head_x_size, 
-                           const int head_y_size, 
-                           const float nms_overlap_thresh)
-    : head_x_size_(head_x_size),
-      head_y_size_(head_y_size),
-      nms_overlap_thresh_(nms_overlap_thresh) {}
-
-int Iou3dNmsCuda::DoIou3dNms(const int box_num_pre,
-                             const float* res_box,
-                             const int* res_sorted_indices,
-                             long* host_keep_data) {
-  const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);      // 划分多少块
+int Iou3dNmsCuda::DoIou3dNms(
+  const int box_num_pre, const float * res_box, const int * res_sorted_indices,
+  long * host_keep_data)
+{
+  const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);  // 划分多少块
   // printf("box_num_pre=%d, col_blocks=%d\n", box_num_pre, col_blocks);
-  unsigned long long* dev_mask = NULL;
-  cudaMalloc((void**)&dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long));  // 
+  unsigned long long * dev_mask = NULL;
+  cudaMalloc((void **)&dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long));  //
   // THREADS_PER_BLOCK_NMS 掩码的长度
-  dim3 blocks(DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS),
-              DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS));
+  dim3 blocks(DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS), DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
-  RotatedNmsWithIndicesKernel<<<blocks, threads>>>(box_num_pre,
-                                                   nms_overlap_thresh_,
-                                                   res_box,
-                                                   res_sorted_indices,
-                                                   dev_mask);
+  RotatedNmsWithIndicesKernel<<<blocks, threads>>>(
+    box_num_pre, nms_overlap_thresh_, res_box, res_sorted_indices, dev_mask);
 
   unsigned long long host_mask[box_num_pre * col_blocks];
-  cudaMemcpy(host_mask, dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long), cudaMemcpyDeviceToHost);
+  cudaMemcpy(
+    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long),
+    cudaMemcpyDeviceToHost);
   cudaFree(dev_mask);
 
   unsigned long long host_remv[col_blocks];
@@ -583,9 +599,9 @@ int Iou3dNmsCuda::DoIou3dNms(const int box_num_pre,
   for (int i = 0; i < box_num_pre; i++) {
     int nblock = i / THREADS_PER_BLOCK_NMS;
     int inblock = i % THREADS_PER_BLOCK_NMS;
-    if (!(host_remv[nblock] & (1ULL << inblock))) { // 满足此要求说明和前面选中的所有box都不冲突
-      host_keep_data[num_to_keep++] = i;           // 把满足的box加入
-      for (int j = nblock; j < col_blocks; j++) { // 把和序号i的box，overlap超过阈值的都考虑上
+    if (!(host_remv[nblock] & (1ULL << inblock))) {  // 满足此要求说明和前面选中的所有box都不冲突
+      host_keep_data[num_to_keep++] = i;  // 把满足的box加入
+      for (int j = nblock; j < col_blocks; j++) {  // 把和序号i的box，overlap超过阈值的都考虑上
         host_remv[j] |= host_mask[i * col_blocks + j];
       }
     }
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index 2d889e8539cb8..f8987f9529965 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -1,268 +1,284 @@
 #ifdef __HAVE_NVJPEG__
 
-#include <chrono>
 #include "nvjpegdecoder.hpp"
 
-using std::chrono::high_resolution_clock;
-using std::chrono::duration;
-
-int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); }
-
-int dev_free(void *p) { return (int)cudaFree(p); }
+#include <chrono>
 
-int host_malloc(void **p, size_t s, unsigned int f) { return (int)cudaHostAlloc(p, s, f); }
+using std::chrono::duration;
+using std::chrono::high_resolution_clock;
 
-int host_free(void *p) { return (int)cudaFreeHost(p); }
+int dev_malloc(void ** p, size_t s)
+{
+  return (int)cudaMalloc(p, s);
+}
 
+int dev_free(void * p)
+{
+  return (int)cudaFree(p);
+}
 
-int prepare_buffers(const std::vector<std::vector<char>> &file_data, 
-                    std::vector<int> &img_width, std::vector<int> &img_height,
-                    std::vector<nvjpegImage_t> &ibuf, std::vector<nvjpegImage_t> &isz, 
-                    share_params &share_param) {
-    int widths[NVJPEG_MAX_COMPONENT];
-    int heights[NVJPEG_MAX_COMPONENT];
-    int channels;
-    nvjpegChromaSubsampling_t subsampling;
+int host_malloc(void ** p, size_t s, unsigned int f)
+{
+  return (int)cudaHostAlloc(p, s, f);
+}
 
-    for (size_t i = 0; i < file_data.size(); i++) {
-        CHECK_NVJPEG(nvjpegGetImageInfo(
-            share_param.nvjpeg_handle, (uchar *)file_data[i].data(), file_data[i].size(),
-            &channels, &subsampling, widths, heights));
+int host_free(void * p)
+{
+  return (int)cudaFreeHost(p);
+}
 
-        int mul = 1;
-        // in the case of interleaved RGB output, write only to single channel, but
-        // 3 samples at once
-        if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
-            channels = 1;
-            mul = 3;
-        }
-        // in the case of rgb create 3 buffers with sizes of original image
-        else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
-            channels = 3;
-            widths[1] = widths[2] = widths[0];
-            heights[1] = heights[2] = heights[0];
-        }
+int prepare_buffers(
+  const std::vector<std::vector<char>> & file_data, std::vector<int> & img_width,
+  std::vector<int> & img_height, std::vector<nvjpegImage_t> & ibuf,
+  std::vector<nvjpegImage_t> & isz, share_params & share_param)
+{
+  int widths[NVJPEG_MAX_COMPONENT];
+  int heights[NVJPEG_MAX_COMPONENT];
+  int channels;
+  nvjpegChromaSubsampling_t subsampling;
+
+  for (size_t i = 0; i < file_data.size(); i++) {
+    CHECK_NVJPEG(nvjpegGetImageInfo(
+      share_param.nvjpeg_handle, (uchar *)file_data[i].data(), file_data[i].size(), &channels,
+      &subsampling, widths, heights));
+
+    int mul = 1;
+    // in the case of interleaved RGB output, write only to single channel, but
+    // 3 samples at once
+    if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
+      channels = 1;
+      mul = 3;
+    }
+    // in the case of rgb create 3 buffers with sizes of original image
+    else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
+      channels = 3;
+      widths[1] = widths[2] = widths[0];
+      heights[1] = heights[2] = heights[0];
+    }
 
-        if(img_width[i] != widths[0] || img_height[i] != heights[0]){
-            img_width[i] = widths[0];
-            img_height[i] = heights[0];
-            // realloc output buffer if required
-            for (int c = 0; c < channels; c++) {
-                int aw = mul * widths[c];
-                int ah = heights[c];
-                size_t sz = aw * ah;
-                ibuf[i].pitch[c] = aw;
-                if (sz > isz[i].pitch[c]) {
-                    if (ibuf[i].channel[c]) {
-                        CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
-                    }
-                    CHECK_CUDA(cudaMalloc((void **)&ibuf[i].channel[c], sz));
-                    isz[i].pitch[c] = sz;
-                }
-            }
+    if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
+      img_width[i] = widths[0];
+      img_height[i] = heights[0];
+      // realloc output buffer if required
+      for (int c = 0; c < channels; c++) {
+        int aw = mul * widths[c];
+        int ah = heights[c];
+        size_t sz = aw * ah;
+        ibuf[i].pitch[c] = aw;
+        if (sz > isz[i].pitch[c]) {
+          if (ibuf[i].channel[c]) {
+            CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
+          }
+          CHECK_CUDA(cudaMalloc((void **)&ibuf[i].channel[c], sz));
+          isz[i].pitch[c] = sz;
         }
+      }
     }
-    return EXIT_SUCCESS;
+  }
+  return EXIT_SUCCESS;
 }
 
-void create_decoupled_api_handles(std::vector<decode_params_t> &params, share_params &share_param){
-
-    for(size_t i = 0; i < params.size(); i++){
-        CHECK_NVJPEG(nvjpegDecoderCreate(share_param.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &params[i].nvjpeg_decoder));
-        CHECK_NVJPEG(nvjpegDecoderStateCreate(share_param.nvjpeg_handle, params[i].nvjpeg_decoder, &params[i].nvjpeg_decoupled_state));
-
-        CHECK_NVJPEG(nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[0]));
-        CHECK_NVJPEG(nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[1]));
-        CHECK_NVJPEG(nvjpegBufferDeviceCreate(share_param.nvjpeg_handle, NULL, &params[i].device_buffer));
-
-        CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[0]));
-        CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[1]));
-
-
-        CHECK_NVJPEG(nvjpegDecodeParamsCreate(share_param.nvjpeg_handle, &params[i].nvjpeg_decode_params));
-    }
+void create_decoupled_api_handles(std::vector<decode_params_t> & params, share_params & share_param)
+{
+  for (size_t i = 0; i < params.size(); i++) {
+    CHECK_NVJPEG(nvjpegDecoderCreate(
+      share_param.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &params[i].nvjpeg_decoder));
+    CHECK_NVJPEG(nvjpegDecoderStateCreate(
+      share_param.nvjpeg_handle, params[i].nvjpeg_decoder, &params[i].nvjpeg_decoupled_state));
+
+    CHECK_NVJPEG(
+      nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[0]));
+    CHECK_NVJPEG(
+      nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[1]));
+    CHECK_NVJPEG(
+      nvjpegBufferDeviceCreate(share_param.nvjpeg_handle, NULL, &params[i].device_buffer));
+
+    CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[0]));
+    CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[1]));
+
+    CHECK_NVJPEG(
+      nvjpegDecodeParamsCreate(share_param.nvjpeg_handle, &params[i].nvjpeg_decode_params));
+  }
 }
 
-void destroy_decoupled_api_handles(std::vector<decode_params_t> &params, share_params &share_param)
+void destroy_decoupled_api_handles(
+  std::vector<decode_params_t> & params, share_params & share_param)
 {
-    for(size_t i = 0; i < params.size(); i++){
-        CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params[i].nvjpeg_decode_params));
-
-        CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[0]));
-        CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[1]));
-        CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[0]));
-        CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[1]));
-        CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params[i].device_buffer));
-
-        CHECK_NVJPEG(nvjpegJpegStateDestroy(params[i].nvjpeg_decoupled_state));
-        CHECK_NVJPEG(nvjpegDecoderDestroy(params[i].nvjpeg_decoder));
-    }
+  for (size_t i = 0; i < params.size(); i++) {
+    CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params[i].nvjpeg_decode_params));
+
+    CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[0]));
+    CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[1]));
+    CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[0]));
+    CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[1]));
+    CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params[i].device_buffer));
+
+    CHECK_NVJPEG(nvjpegJpegStateDestroy(params[i].nvjpeg_decoupled_state));
+    CHECK_NVJPEG(nvjpegDecoderDestroy(params[i].nvjpeg_decoder));
+  }
 }
 
-void release_buffers(std::vector<nvjpegImage_t> &ibuf){   
-    for(size_t i = 0; i < ibuf.size(); i++){
-        for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++)
-            if (ibuf[i].channel[c])
-                CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
-    }
+void release_buffers(std::vector<nvjpegImage_t> & ibuf)
+{
+  for (size_t i = 0; i < ibuf.size(); i++) {
+    for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++)
+      if (ibuf[i].channel[c]) CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
+  }
 }
 
-int get_img(const uchar *d_chanR, int pitchR, 
-            const uchar *d_chanG, int pitchG,
-            const uchar *d_chanB, int pitchB, 
-            size_t width, size_t height, uchar* img){
-    
-    size_t step = width * height;
-
-    CHECK_CUDA(cudaMemcpy2D(img + 0 * step, (size_t)width, d_chanR, (size_t)pitchR,
-                               width, height, cudaMemcpyDeviceToDevice));
-    CHECK_CUDA(cudaMemcpy2D(img + 1 * step, (size_t)width, d_chanG, (size_t)pitchG,
-                               width, height, cudaMemcpyDeviceToDevice));
-    CHECK_CUDA(cudaMemcpy2D(img + 2 * step, (size_t)width, d_chanB, (size_t)pitchB,
-                               width, height, cudaMemcpyDeviceToDevice));
-
-    return EXIT_SUCCESS;
+int get_img(
+  const uchar * d_chanR, int pitchR, const uchar * d_chanG, int pitchG, const uchar * d_chanB,
+  int pitchB, size_t width, size_t height, uchar * img)
+{
+  size_t step = width * height;
+
+  CHECK_CUDA(cudaMemcpy2D(
+    img + 0 * step, (size_t)width, d_chanR, (size_t)pitchR, width, height,
+    cudaMemcpyDeviceToDevice));
+  CHECK_CUDA(cudaMemcpy2D(
+    img + 1 * step, (size_t)width, d_chanG, (size_t)pitchG, width, height,
+    cudaMemcpyDeviceToDevice));
+  CHECK_CUDA(cudaMemcpy2D(
+    img + 2 * step, (size_t)width, d_chanB, (size_t)pitchB, width, height,
+    cudaMemcpyDeviceToDevice));
+
+  return EXIT_SUCCESS;
 }
 
+void decode_single_image(
+  const std::vector<char> & img_data, nvjpegImage_t & out, decode_params_t & params,
+  share_params & share_param, double & time)
+{
+  CHECK_CUDA(cudaStreamSynchronize(params.stream));
+  cudaEvent_t startEvent = NULL, stopEvent = NULL;
+  float loopTime = 0;
+  // cudaEventBlockingSync
+  CHECK_CUDA(cudaEventCreate(&startEvent));
+  CHECK_CUDA(cudaEventCreate(&stopEvent));
 
+  CHECK_CUDA(cudaEventRecord(startEvent, params.stream));
 
-void decode_single_image(const std::vector<char> &img_data, nvjpegImage_t &out, decode_params_t &params, 
-                                                                share_params &share_param, double &time){
-
-    CHECK_CUDA(cudaStreamSynchronize(params.stream));
-    cudaEvent_t startEvent = NULL, stopEvent = NULL;
-    float loopTime = 0;
-    // cudaEventBlockingSync
-    CHECK_CUDA(cudaEventCreate(&startEvent));
-    CHECK_CUDA(cudaEventCreate(&stopEvent));
-
-    CHECK_CUDA(cudaEventRecord(startEvent, params.stream));
-
+  CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, params.device_buffer));
+  int buffer_index = 0;
+  CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
 
-    CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, params.device_buffer));
-    int buffer_index = 0;
-    CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
+  CHECK_NVJPEG(nvjpegJpegStreamParse(
+    share_param.nvjpeg_handle, (const uchar *)img_data.data(), img_data.size(), 0, 0,
+    params.jpeg_streams[buffer_index]));
 
-    CHECK_NVJPEG(nvjpegJpegStreamParse(share_param.nvjpeg_handle, 
-                (const uchar *)img_data.data(),  img_data.size(), 0, 0, 
-                params.jpeg_streams[buffer_index]));
+  CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
+    params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
 
-    CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(params.nvjpeg_decoupled_state,
-                                                params.pinned_buffers[buffer_index]));
+  CHECK_NVJPEG(nvjpegDecodeJpegHost(
+    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
+    params.nvjpeg_decode_params, params.jpeg_streams[buffer_index]));
 
-    CHECK_NVJPEG(nvjpegDecodeJpegHost(share_param.nvjpeg_handle, params.nvjpeg_decoder, 
-                params.nvjpeg_decoupled_state,  params.nvjpeg_decode_params, 
-                 params.jpeg_streams[buffer_index]));
+  CHECK_CUDA(cudaStreamSynchronize(params.stream));
 
-    CHECK_CUDA(cudaStreamSynchronize(params.stream));
+  CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
+    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
+    params.jpeg_streams[buffer_index], params.stream));
 
-    CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(share_param.nvjpeg_handle, 
-                    params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
-                    params.jpeg_streams[buffer_index], params.stream));
+  buffer_index = 1 - buffer_index;  // switch pinned buffer in pipeline mode to avoid an extra sync
 
-    buffer_index = 1 - buffer_index; // switch pinned buffer in pipeline mode to avoid an extra sync
+  CHECK_NVJPEG(nvjpegDecodeJpegDevice(
+    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state, &out,
+    params.stream));
 
-    CHECK_NVJPEG(nvjpegDecodeJpegDevice(share_param.nvjpeg_handle, params.nvjpeg_decoder, 
-                    params.nvjpeg_decoupled_state, &out, params.stream));
-    
-    
-    // CHECK_CUDA(cudaStreamSynchronize(params.stream)); //TODO new add
+  // CHECK_CUDA(cudaStreamSynchronize(params.stream)); //TODO new add
 
-    CHECK_CUDA(cudaEventRecord(stopEvent, params.stream));
+  CHECK_CUDA(cudaEventRecord(stopEvent, params.stream));
 
-    CHECK_CUDA(cudaEventSynchronize(stopEvent));
-    CHECK_CUDA(cudaEventElapsedTime(&loopTime, startEvent, stopEvent));
-    time = 0.001 * static_cast<double>(loopTime); // cudaEventElapsedTime returns milliseconds
-    // time = 1.0;
+  CHECK_CUDA(cudaEventSynchronize(stopEvent));
+  CHECK_CUDA(cudaEventElapsedTime(&loopTime, startEvent, stopEvent));
+  time = 0.001 * static_cast<double>(loopTime);  // cudaEventElapsedTime returns milliseconds
+                                                 // time = 1.0;
 }
 
-
-nvjpegDecoder::nvjpegDecoder(size_t n, size_t _fmt) : N_img(n), iout(n), isz(n), widths(n), heights(n), params(n), share_param(_fmt){
-    init();
+nvjpegDecoder::nvjpegDecoder(size_t n, size_t _fmt)
+: N_img(n), iout(n), isz(n), widths(n), heights(n), params(n), share_param(_fmt)
+{
+  init();
 }
 
-
-int nvjpegDecoder::decode(const std::vector<std::vector<char>> &files_data, uchar* out_imgs){    
-
-    for(size_t i = 0; i < params.size(); i++){
-        CHECK_CUDA(cudaStreamCreate(&params[i].stream));
-    }
-    if (prepare_buffers(files_data, widths, heights, iout, isz, share_param))
-        return EXIT_FAILURE;
-
-    double total_time = 0;
-    double times[6];
-
-    auto decode_start = high_resolution_clock::now();
-    std::vector<std::thread> threads(files_data.size());
-    for(size_t i = 0; i < files_data.size(); i++){
-        threads[i] = std::thread(decode_single_image, std::ref(files_data[i]), std::ref(iout[i]), std::ref(params[i]), std::ref(share_param), std::ref(times[i]));
-    }
-    for(size_t i = 0; i < files_data.size(); i++){
-        threads[i].join();
-        total_time += times[i];
-    }
-    auto decode_end = high_resolution_clock::now();
-    duration<double> decode_time = decode_end - decode_start;
-    
-    printf("Decode total time : %.4lf ms\n", decode_time.count() * 1000);
-
-    for(size_t i = 0; i < files_data.size(); i++){
-        get_img(iout[i].channel[0], iout[i].pitch[0],
-                iout[i].channel[1], iout[i].pitch[1], 
-                iout[i].channel[2], iout[i].pitch[2], 
-                widths[i], heights[i], out_imgs + i * 3 * widths[i] * heights[i]);
-    }
-
-    
-    for(size_t i = 0; i < params.size(); i++){
-        CHECK_CUDA(cudaStreamDestroy(params[i].stream));
-    }
-    return EXIT_SUCCESS;
+int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uchar * out_imgs)
+{
+  for (size_t i = 0; i < params.size(); i++) {
+    CHECK_CUDA(cudaStreamCreate(&params[i].stream));
+  }
+  if (prepare_buffers(files_data, widths, heights, iout, isz, share_param)) return EXIT_FAILURE;
+
+  double total_time = 0;
+  double times[6];
+
+  auto decode_start = high_resolution_clock::now();
+  std::vector<std::thread> threads(files_data.size());
+  for (size_t i = 0; i < files_data.size(); i++) {
+    threads[i] = std::thread(
+      decode_single_image, std::ref(files_data[i]), std::ref(iout[i]), std::ref(params[i]),
+      std::ref(share_param), std::ref(times[i]));
+  }
+  for (size_t i = 0; i < files_data.size(); i++) {
+    threads[i].join();
+    total_time += times[i];
+  }
+  auto decode_end = high_resolution_clock::now();
+  duration<double> decode_time = decode_end - decode_start;
+
+  printf("Decode total time : %.4lf ms\n", decode_time.count() * 1000);
+
+  for (size_t i = 0; i < files_data.size(); i++) {
+    get_img(
+      iout[i].channel[0], iout[i].pitch[0], iout[i].channel[1], iout[i].pitch[1],
+      iout[i].channel[2], iout[i].pitch[2], widths[i], heights[i],
+      out_imgs + i * 3 * widths[i] * heights[i]);
+  }
+
+  for (size_t i = 0; i < params.size(); i++) {
+    CHECK_CUDA(cudaStreamDestroy(params[i].stream));
+  }
+  return EXIT_SUCCESS;
 }
 
-
-
-int nvjpegDecoder::init(){
-  
-    nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
-    nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
-
-    nvjpegStatus_t status = nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator,
-                                           &pinned_allocator, NVJPEG_FLAGS_DEFAULT, 
-                                           &share_param.nvjpeg_handle); 
-    share_param.hw_decode_available = true;
-    if (status == NVJPEG_STATUS_ARCH_MISMATCH){
-        std::cout << "Hardware Decoder not supported. Falling back to default backend" << std::endl;
-        CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, 
-                                    NVJPEG_FLAGS_DEFAULT, &share_param.nvjpeg_handle));
-        share_param.hw_decode_available = false;
-    }
-    else{
-        CHECK_NVJPEG(status);
-    }
-
-    CHECK_NVJPEG(nvjpegJpegStateCreate(share_param.nvjpeg_handle, &share_param.nvjpeg_state));
-
-    create_decoupled_api_handles(params, share_param);
-
-    for(size_t i = 0; i < iout.size(); i++){
-        for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++){
-            iout[i].channel[c] = NULL;
-            iout[i].pitch[c] = 0;
-            isz[i].pitch[c] = 0;
-        }
+int nvjpegDecoder::init()
+{
+  nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
+  nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
+
+  nvjpegStatus_t status = nvjpegCreateEx(
+    NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
+    &share_param.nvjpeg_handle);
+  share_param.hw_decode_available = true;
+  if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
+    std::cout << "Hardware Decoder not supported. Falling back to default backend" << std::endl;
+    CHECK_NVJPEG(nvjpegCreateEx(
+      NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
+      &share_param.nvjpeg_handle));
+    share_param.hw_decode_available = false;
+  } else {
+    CHECK_NVJPEG(status);
+  }
+
+  CHECK_NVJPEG(nvjpegJpegStateCreate(share_param.nvjpeg_handle, &share_param.nvjpeg_state));
+
+  create_decoupled_api_handles(params, share_param);
+
+  for (size_t i = 0; i < iout.size(); i++) {
+    for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
+      iout[i].channel[c] = NULL;
+      iout[i].pitch[c] = 0;
+      isz[i].pitch[c] = 0;
     }
-    return EXIT_SUCCESS;
+  }
+  return EXIT_SUCCESS;
 }
 
-
-nvjpegDecoder::~nvjpegDecoder(){ 
-    release_buffers(iout);
-    destroy_decoupled_api_handles(params, share_param);
-    CHECK_NVJPEG(nvjpegJpegStateDestroy(share_param.nvjpeg_state));
-    CHECK_NVJPEG(nvjpegDestroy(share_param.nvjpeg_handle));
+nvjpegDecoder::~nvjpegDecoder()
+{
+  release_buffers(iout);
+  destroy_decoupled_api_handles(params, share_param);
+  CHECK_NVJPEG(nvjpegJpegStateDestroy(share_param.nvjpeg_state));
+  CHECK_NVJPEG(nvjpegDestroy(share_param.nvjpeg_handle));
 }
 
-
-#endif
\ No newline at end of file
+#endif
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/tensorrt_bevdet/src/postprocess.cu
index d2be6b01dfbe8..30cf19c764c02 100644
--- a/perception/tensorrt_bevdet/src/postprocess.cu
+++ b/perception/tensorrt_bevdet/src/postprocess.cu
@@ -1,223 +1,196 @@
+#include "postprocess.hpp"
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
-#include <thrust/host_vector.h>
-#include <thrust/device_vector.h>
-
-#include "postprocess.hpp"
 
-
-__device__ float sigmoid_gpu(const float x) { return 1.0f / (1.0f + expf(-x)); }
-
-__global__ void BEVDecodeObjectKernel(const int map_size,         // 40000
-                                   const float score_thresh,   // 0.1
-                                //    const int nms_pre_max_size, // 4096
-                                   const float x_start,
-                                   const float y_start,
-                                   const float x_step,
-                                   const float y_step,
-                                   const int output_h,
-                                   const int output_w,
-                                   const int downsample_size,
-                                   const int num_class_in_task,
-                                   const int cls_range,
-                                   const float* reg,
-                                   const float* hei,
-                                   const float* dim,
-                                   const float* rot,
-                                   const float* vel,
-                                   const float* cls,
-                                   float* res_box,
-                                   float* res_conf,
-                                   int* res_cls,
-                                   int* res_box_num,
-                                   float* rescale_factor){  // 根据置信度，初筛，筛选后有res_box_num个box，不超过nms_pre_max_size 4096
-    int idx = threadIdx.x + blockDim.x * blockIdx.x;
-    if (idx >= map_size) return;
-
-    float max_score = cls[idx]; // 初始化为task的第一个类
-    int label = cls_range;      // 初始化为task的第一个类
-    for (int i = 1; i < num_class_in_task; ++i) {
-        float cur_score = cls[idx + i * map_size];
-        if (cur_score > max_score){
-            max_score = cur_score;
-            label = i + cls_range;
-        }
-    }
-
-    int coor_x = idx % output_h;  //
-    int coor_y = idx / output_w;  //
-
-    float conf = sigmoid_gpu(max_score); // 计算置信度
-    if (conf > score_thresh){
-        int cur_valid_box_id = atomicAdd(res_box_num, 1);
-        res_box[cur_valid_box_id * kBoxBlockSize + 0] = 
-            (reg[idx + 0 * map_size] + coor_x) * x_step + x_start;
-        res_box[cur_valid_box_id * kBoxBlockSize + 1] = 
-            (reg[idx + 1 * map_size] + coor_y) * y_step + y_start;
-        res_box[cur_valid_box_id * kBoxBlockSize + 2] = hei[idx];
-        res_box[cur_valid_box_id * kBoxBlockSize + 3] = 
-                                expf(dim[idx + 0 * map_size]) * rescale_factor[label]; // nms scale
-        res_box[cur_valid_box_id * kBoxBlockSize + 4] = 
-                                expf(dim[idx + 1 * map_size]) * rescale_factor[label];
-        res_box[cur_valid_box_id * kBoxBlockSize + 5] = 
-                                expf(dim[idx + 2 * map_size]) * rescale_factor[label];
-        res_box[cur_valid_box_id * kBoxBlockSize + 6] = atan2f(rot[idx], rot[idx + map_size]);
-        res_box[cur_valid_box_id * kBoxBlockSize + 7] = vel[idx];
-        res_box[cur_valid_box_id * kBoxBlockSize + 8] = vel[idx + map_size];
-
-
-        res_conf[cur_valid_box_id] = conf;
-        res_cls[cur_valid_box_id] = label;
-    }
+__device__ float sigmoid_gpu(const float x)
+{
+  return 1.0f / (1.0f + expf(-x));
 }
 
-PostprocessGPU::PostprocessGPU(const int _class_num, 
-                               const float _score_thresh,
-                               const float _nms_thresh, 
-                               const int _nms_pre_maxnum,
-                               const int _nms_post_maxnum, 
-                               const int _down_sample, 
-                               const int _output_h, 
-                               const int _output_w, 
-                               const float _x_step, 
-                               const float _y_step,
-                               const float _x_start, 
-                               const float _y_start,
-                               const std::vector<int>& _class_num_pre_task,
-                               const std::vector<float>& _nms_rescale_factor) :
-                               class_num(_class_num),
-                               score_thresh(_score_thresh),
-                               nms_thresh(_nms_thresh), 
-                               nms_pre_maxnum(_nms_pre_maxnum),
-                               nms_post_maxnum(_nms_post_maxnum), 
-                               down_sample(_down_sample),
-                               output_h(_output_h), 
-                               output_w(_output_w), 
-                               x_step(_x_step),
-                               y_step(_y_step), 
-                               x_start(_x_start), 
-                               y_start(_y_start),
-                               map_size(output_h * output_w),
-                               class_num_pre_task(_class_num_pre_task),
-                               nms_rescale_factor(_nms_rescale_factor),
-                               task_num(_class_num_pre_task.size()){
-
-    CHECK_CUDA(cudaMalloc((void**)&boxes_dev, kBoxBlockSize * map_size * sizeof(float)));
-    CHECK_CUDA(cudaMalloc((void**)&score_dev, map_size * sizeof(float)));
-    CHECK_CUDA(cudaMalloc((void**)&cls_dev, map_size * sizeof(int)));
-    CHECK_CUDA(cudaMalloc((void**)&sorted_indices_dev, map_size * sizeof(int)));
-    CHECK_CUDA(cudaMalloc((void**)&valid_box_num, sizeof(int)));
-    CHECK_CUDA(cudaMalloc((void**)&nms_rescale_factor_dev, class_num * sizeof(float)));
-
-    CHECK_CUDA(cudaMallocHost((void**)&boxes_host, kBoxBlockSize * map_size * sizeof(float)));
-    CHECK_CUDA(cudaMallocHost((void**)&score_host, nms_pre_maxnum * sizeof(float)));
-    CHECK_CUDA(cudaMallocHost((void**)&cls_host, map_size * sizeof(float)));
-    CHECK_CUDA(cudaMallocHost((void**)&sorted_indices_host, nms_pre_maxnum * sizeof(int)));
-    CHECK_CUDA(cudaMallocHost((void**)&keep_data_host, nms_pre_maxnum * sizeof(long)));
-
-    CHECK_CUDA(cudaMemcpy(nms_rescale_factor_dev, nms_rescale_factor.data(),
-                                        class_num * sizeof(float), cudaMemcpyHostToDevice));
-
-    iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
-
-
-    for(auto i = 0; i < nms_rescale_factor.size(); i++){
-        printf("%.2f%c", nms_rescale_factor[i], i == nms_rescale_factor.size() - 1 ? '\n' : ' ');
+__global__ void BEVDecodeObjectKernel(
+  const int map_size,        // 40000
+  const float score_thresh,  // 0.1
+                             //    const int nms_pre_max_size, // 4096
+  const float x_start, const float y_start, const float x_step, const float y_step,
+  const int output_h, const int output_w, const int downsample_size, const int num_class_in_task,
+  const int cls_range, const float * reg, const float * hei, const float * dim, const float * rot,
+  const float * vel, const float * cls, float * res_box, float * res_conf, int * res_cls,
+  int * res_box_num, float * rescale_factor)
+{  // 根据置信度，初筛，筛选后有res_box_num个box，不超过nms_pre_max_size 4096
+  int idx = threadIdx.x + blockDim.x * blockIdx.x;
+  if (idx >= map_size) return;
+
+  float max_score = cls[idx];  // 初始化为task的第一个类
+  int label = cls_range;       // 初始化为task的第一个类
+  for (int i = 1; i < num_class_in_task; ++i) {
+    float cur_score = cls[idx + i * map_size];
+    if (cur_score > max_score) {
+      max_score = cur_score;
+      label = i + cls_range;
     }
+  }
+
+  int coor_x = idx % output_h;  //
+  int coor_y = idx / output_w;  //
+
+  float conf = sigmoid_gpu(max_score);  // 计算置信度
+  if (conf > score_thresh) {
+    int cur_valid_box_id = atomicAdd(res_box_num, 1);
+    res_box[cur_valid_box_id * kBoxBlockSize + 0] =
+      (reg[idx + 0 * map_size] + coor_x) * x_step + x_start;
+    res_box[cur_valid_box_id * kBoxBlockSize + 1] =
+      (reg[idx + 1 * map_size] + coor_y) * y_step + y_start;
+    res_box[cur_valid_box_id * kBoxBlockSize + 2] = hei[idx];
+    res_box[cur_valid_box_id * kBoxBlockSize + 3] =
+      expf(dim[idx + 0 * map_size]) * rescale_factor[label];  // nms scale
+    res_box[cur_valid_box_id * kBoxBlockSize + 4] =
+      expf(dim[idx + 1 * map_size]) * rescale_factor[label];
+    res_box[cur_valid_box_id * kBoxBlockSize + 5] =
+      expf(dim[idx + 2 * map_size]) * rescale_factor[label];
+    res_box[cur_valid_box_id * kBoxBlockSize + 6] = atan2f(rot[idx], rot[idx + map_size]);
+    res_box[cur_valid_box_id * kBoxBlockSize + 7] = vel[idx];
+    res_box[cur_valid_box_id * kBoxBlockSize + 8] = vel[idx + map_size];
+
+    res_conf[cur_valid_box_id] = conf;
+    res_cls[cur_valid_box_id] = label;
+  }
+}
 
+PostprocessGPU::PostprocessGPU(
+  const int _class_num, const float _score_thresh, const float _nms_thresh,
+  const int _nms_pre_maxnum, const int _nms_post_maxnum, const int _down_sample,
+  const int _output_h, const int _output_w, const float _x_step, const float _y_step,
+  const float _x_start, const float _y_start, const std::vector<int> & _class_num_pre_task,
+  const std::vector<float> & _nms_rescale_factor)
+: class_num(_class_num),
+  score_thresh(_score_thresh),
+  nms_thresh(_nms_thresh),
+  nms_pre_maxnum(_nms_pre_maxnum),
+  nms_post_maxnum(_nms_post_maxnum),
+  down_sample(_down_sample),
+  output_h(_output_h),
+  output_w(_output_w),
+  x_step(_x_step),
+  y_step(_y_step),
+  x_start(_x_start),
+  y_start(_y_start),
+  map_size(output_h * output_w),
+  class_num_pre_task(_class_num_pre_task),
+  nms_rescale_factor(_nms_rescale_factor),
+  task_num(_class_num_pre_task.size())
+{
+  CHECK_CUDA(cudaMalloc((void **)&boxes_dev, kBoxBlockSize * map_size * sizeof(float)));
+  CHECK_CUDA(cudaMalloc((void **)&score_dev, map_size * sizeof(float)));
+  CHECK_CUDA(cudaMalloc((void **)&cls_dev, map_size * sizeof(int)));
+  CHECK_CUDA(cudaMalloc((void **)&sorted_indices_dev, map_size * sizeof(int)));
+  CHECK_CUDA(cudaMalloc((void **)&valid_box_num, sizeof(int)));
+  CHECK_CUDA(cudaMalloc((void **)&nms_rescale_factor_dev, class_num * sizeof(float)));
+
+  CHECK_CUDA(cudaMallocHost((void **)&boxes_host, kBoxBlockSize * map_size * sizeof(float)));
+  CHECK_CUDA(cudaMallocHost((void **)&score_host, nms_pre_maxnum * sizeof(float)));
+  CHECK_CUDA(cudaMallocHost((void **)&cls_host, map_size * sizeof(float)));
+  CHECK_CUDA(cudaMallocHost((void **)&sorted_indices_host, nms_pre_maxnum * sizeof(int)));
+  CHECK_CUDA(cudaMallocHost((void **)&keep_data_host, nms_pre_maxnum * sizeof(long)));
+
+  CHECK_CUDA(cudaMemcpy(
+    nms_rescale_factor_dev, nms_rescale_factor.data(), class_num * sizeof(float),
+    cudaMemcpyHostToDevice));
+
+  iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
+
+  for (auto i = 0; i < nms_rescale_factor.size(); i++) {
+    printf("%.2f%c", nms_rescale_factor[i], i == nms_rescale_factor.size() - 1 ? '\n' : ' ');
+  }
 }
-PostprocessGPU::~PostprocessGPU(){
-    CHECK_CUDA(cudaFree(boxes_dev));
-    CHECK_CUDA(cudaFree(score_dev));
-    CHECK_CUDA(cudaFree(cls_dev));
-    CHECK_CUDA(cudaFree(sorted_indices_dev));
-    CHECK_CUDA(cudaFree(valid_box_num));
-    CHECK_CUDA(cudaFree(nms_rescale_factor_dev));
-
-    CHECK_CUDA(cudaFreeHost(boxes_host));
-    CHECK_CUDA(cudaFreeHost(score_host));
-    CHECK_CUDA(cudaFreeHost(cls_host));
-    CHECK_CUDA(cudaFreeHost(sorted_indices_host));
-    CHECK_CUDA(cudaFreeHost(keep_data_host));
+PostprocessGPU::~PostprocessGPU()
+{
+  CHECK_CUDA(cudaFree(boxes_dev));
+  CHECK_CUDA(cudaFree(score_dev));
+  CHECK_CUDA(cudaFree(cls_dev));
+  CHECK_CUDA(cudaFree(sorted_indices_dev));
+  CHECK_CUDA(cudaFree(valid_box_num));
+  CHECK_CUDA(cudaFree(nms_rescale_factor_dev));
+
+  CHECK_CUDA(cudaFreeHost(boxes_host));
+  CHECK_CUDA(cudaFreeHost(score_host));
+  CHECK_CUDA(cudaFreeHost(cls_host));
+  CHECK_CUDA(cudaFreeHost(sorted_indices_host));
+  CHECK_CUDA(cudaFreeHost(keep_data_host));
 }
 
-
-void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box>& out_detections){
-
-    // bev_buffer : BEV_feat, reg_0, hei_0, dim_0, rot_0, vel_0, heatmap_0, reg_1 ...
-    const int task_num = class_num_pre_task.size();
-    int cur_start_label = 0;
-    for(int i = 0; i < task_num; i++){
-        float* reg = (float*)bev_buffer[i * 6 + 0];     // 2 x 128 x 128
-        float* hei = (float*)bev_buffer[i * 6 + 1];     // 1 x 128 x 128
-        float* dim = (float*)bev_buffer[i * 6 + 2];     // 3 x 128 x 128
-        float* rot = (float*)bev_buffer[i * 6 + 3];     // 2 x 128 x 128
-        float* vel = (float*)bev_buffer[i * 6 + 4];     // 2 x 128 x 128
-        float* heatmap = (float*)bev_buffer[i * 6 + 5]; // c x 128 x 128
-
-        dim3 grid(DIVUP(map_size, NUM_THREADS));
-        CHECK_CUDA(cudaMemset(valid_box_num, 0, sizeof(int)));
-        BEVDecodeObjectKernel<<<grid, NUM_THREADS>>>(map_size, score_thresh, 
-                                         x_start, y_start, x_step, y_step, output_h,
-                                         output_w, down_sample, class_num_pre_task[i],
-                                         cur_start_label, reg, hei, dim, rot, 
-                                         vel, 
-                                         heatmap,
-                                         boxes_dev, score_dev, cls_dev, valid_box_num,
-                                         nms_rescale_factor_dev);
-
-        /*
-        此时 boxes_dev, score_dev, cls_dev 有 valid_box_num 个元素，可能大于nms_pre_maxnum, 而且是无序排列的
-        */ 
-        int box_num_pre = 0;
-        CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));
-
-        thrust::sequence(thrust::device, sorted_indices_dev, sorted_indices_dev + box_num_pre);
-        thrust::sort_by_key(thrust::device, score_dev, score_dev + box_num_pre, 
-                            sorted_indices_dev, thrust::greater<float>());
-        // 此时 score_dev 是降序排列的，而 sorted_indices_dev 索引着原顺序，
-        // 即 sorted_indices_dev[i] = j; i:现在的位置，j:原索引;  j:[0, map_size)
-
-
-        box_num_pre = std::min(box_num_pre, nms_pre_maxnum);
-
-        int box_num_post = iou3d_nms->DoIou3dNms(box_num_pre, boxes_dev, 
-                                                        sorted_indices_dev, keep_data_host);
-
-        box_num_post = std::min(box_num_post, nms_post_maxnum);
-
-
-        CHECK_CUDA(cudaMemcpy(sorted_indices_host, sorted_indices_dev, box_num_pre * sizeof(int),
-                                                                    cudaMemcpyDeviceToHost));
-        CHECK_CUDA(cudaMemcpy(boxes_host, boxes_dev, kBoxBlockSize * map_size * sizeof(float),
-                                                                    cudaMemcpyDeviceToHost));
-        CHECK_CUDA(cudaMemcpy(score_host, score_dev, box_num_pre * sizeof(float), 
-                                                                    cudaMemcpyDeviceToHost));
-        CHECK_CUDA(cudaMemcpy(cls_host, cls_dev, map_size * sizeof(float), 
-                                                                    cudaMemcpyDeviceToHost));
-
-
-        for (auto j = 0; j < box_num_post; j++) {
-            int k = keep_data_host[j];
-            int idx = sorted_indices_host[k];
-            Box box;
-            box.x = boxes_host[idx * kBoxBlockSize + 0];
-            box.y = boxes_host[idx * kBoxBlockSize + 1];
-            box.z = boxes_host[idx * kBoxBlockSize + 2];
-            box.l = boxes_host[idx * kBoxBlockSize + 3] / nms_rescale_factor[cls_host[idx]];
-            box.w = boxes_host[idx * kBoxBlockSize + 4] / nms_rescale_factor[cls_host[idx]];
-            box.h = boxes_host[idx * kBoxBlockSize + 5] / nms_rescale_factor[cls_host[idx]];
-            box.r = boxes_host[idx * kBoxBlockSize + 6];
-            box.vx = boxes_host[idx * kBoxBlockSize + 7];
-            box.vy = boxes_host[idx * kBoxBlockSize + 8];
-
-            box.label = cls_host[idx];
-            box.score = score_host[k];
-            box.z -= box.h * 0.5; // bottom height
-            out_detections.push_back(box);
-        }
-        
-        cur_start_label += class_num_pre_task[i];
+void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box> & out_detections)
+{
+  // bev_buffer : BEV_feat, reg_0, hei_0, dim_0, rot_0, vel_0, heatmap_0, reg_1 ...
+  const int task_num = class_num_pre_task.size();
+  int cur_start_label = 0;
+  for (int i = 0; i < task_num; i++) {
+    float * reg = (float *)bev_buffer[i * 6 + 0];      // 2 x 128 x 128
+    float * hei = (float *)bev_buffer[i * 6 + 1];      // 1 x 128 x 128
+    float * dim = (float *)bev_buffer[i * 6 + 2];      // 3 x 128 x 128
+    float * rot = (float *)bev_buffer[i * 6 + 3];      // 2 x 128 x 128
+    float * vel = (float *)bev_buffer[i * 6 + 4];      // 2 x 128 x 128
+    float * heatmap = (float *)bev_buffer[i * 6 + 5];  // c x 128 x 128
+
+    dim3 grid(DIVUP(map_size, NUM_THREADS));
+    CHECK_CUDA(cudaMemset(valid_box_num, 0, sizeof(int)));
+    BEVDecodeObjectKernel<<<grid, NUM_THREADS>>>(
+      map_size, score_thresh, x_start, y_start, x_step, y_step, output_h, output_w, down_sample,
+      class_num_pre_task[i], cur_start_label, reg, hei, dim, rot, vel, heatmap, boxes_dev,
+      score_dev, cls_dev, valid_box_num, nms_rescale_factor_dev);
+
+    /*
+    此时 boxes_dev, score_dev, cls_dev 有 valid_box_num 个元素，可能大于nms_pre_maxnum,
+    而且是无序排列的
+    */
+    int box_num_pre = 0;
+    CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));
+
+    thrust::sequence(thrust::device, sorted_indices_dev, sorted_indices_dev + box_num_pre);
+    thrust::sort_by_key(
+      thrust::device, score_dev, score_dev + box_num_pre, sorted_indices_dev,
+      thrust::greater<float>());
+    // 此时 score_dev 是降序排列的，而 sorted_indices_dev 索引着原顺序，
+    // 即 sorted_indices_dev[i] = j; i:现在的位置，j:原索引;  j:[0, map_size)
+
+    box_num_pre = std::min(box_num_pre, nms_pre_maxnum);
+
+    int box_num_post =
+      iou3d_nms->DoIou3dNms(box_num_pre, boxes_dev, sorted_indices_dev, keep_data_host);
+
+    box_num_post = std::min(box_num_post, nms_post_maxnum);
+
+    CHECK_CUDA(cudaMemcpy(
+      sorted_indices_host, sorted_indices_dev, box_num_pre * sizeof(int), cudaMemcpyDeviceToHost));
+    CHECK_CUDA(cudaMemcpy(
+      boxes_host, boxes_dev, kBoxBlockSize * map_size * sizeof(float), cudaMemcpyDeviceToHost));
+    CHECK_CUDA(
+      cudaMemcpy(score_host, score_dev, box_num_pre * sizeof(float), cudaMemcpyDeviceToHost));
+    CHECK_CUDA(cudaMemcpy(cls_host, cls_dev, map_size * sizeof(float), cudaMemcpyDeviceToHost));
+
+    for (auto j = 0; j < box_num_post; j++) {
+      int k = keep_data_host[j];
+      int idx = sorted_indices_host[k];
+      Box box;
+      box.x = boxes_host[idx * kBoxBlockSize + 0];
+      box.y = boxes_host[idx * kBoxBlockSize + 1];
+      box.z = boxes_host[idx * kBoxBlockSize + 2];
+      box.l = boxes_host[idx * kBoxBlockSize + 3] / nms_rescale_factor[cls_host[idx]];
+      box.w = boxes_host[idx * kBoxBlockSize + 4] / nms_rescale_factor[cls_host[idx]];
+      box.h = boxes_host[idx * kBoxBlockSize + 5] / nms_rescale_factor[cls_host[idx]];
+      box.r = boxes_host[idx * kBoxBlockSize + 6];
+      box.vx = boxes_host[idx * kBoxBlockSize + 7];
+      box.vy = boxes_host[idx * kBoxBlockSize + 8];
+
+      box.label = cls_host[idx];
+      box.score = score_host[k];
+      box.z -= box.h * 0.5;  // bottom height
+      out_detections.push_back(box);
     }
+
+    cur_start_label += class_num_pre_task[i];
+  }
 }
diff --git a/perception/tensorrt_bevdet/src/preprocess.cu b/perception/tensorrt_bevdet/src/preprocess.cu
index b37a7b5a0e89d..2fcb849f58b15 100644
--- a/perception/tensorrt_bevdet/src/preprocess.cu
+++ b/perception/tensorrt_bevdet/src/preprocess.cu
@@ -1,22 +1,25 @@
 #include "common.hpp"
 #include "preprocess.hpp"
+
 #include <thrust/fill.h>
+
 #include <fstream>
 
-__global__ void convert_RGBHWC_to_BGRCHW_kernel(uchar *input, uchar *output, 
-                                                            int channels, int height, int width){
-    int index = blockIdx.x * blockDim.x + threadIdx.x;
-    if(index < channels * height * width){
-        int y = index / 3 / width;
-        int x = index / 3 % width;
-        int c = 2 - index % 3;  // RGB to BGR
+__global__ void convert_RGBHWC_to_BGRCHW_kernel(
+  uchar * input, uchar * output, int channels, int height, int width)
+{
+  int index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < channels * height * width) {
+    int y = index / 3 / width;
+    int x = index / 3 % width;
+    int c = 2 - index % 3;  // RGB to BGR
 
-        output[c * height * width + y * width + x] = input[index];
-    }
+    output[c * height * width + y * width + x] = input[index];
+  }
 }
 // RGBHWC to BGRCHW
-void convert_RGBHWC_to_BGRCHW(uchar *input, uchar *output, 
-                                                        int channels, int height, int width){
-    convert_RGBHWC_to_BGRCHW_kernel<<<DIVUP(channels * height * width, NUM_THREADS), NUM_THREADS>>>
-                                                            (input, output, channels, height, width);
-}
\ No newline at end of file
+void convert_RGBHWC_to_BGRCHW(uchar * input, uchar * output, int channels, int height, int width)
+{
+  convert_RGBHWC_to_BGRCHW_kernel<<<DIVUP(channels * height * width, NUM_THREADS), NUM_THREADS>>>(
+    input, output, channels, height, width);
+}
diff --git a/perception/tensorrt_bevdet/src/preprocess_plugin.cu b/perception/tensorrt_bevdet/src/preprocess_plugin.cu
index e23c0af29c2a0..0eea36251c782 100644
--- a/perception/tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/tensorrt_bevdet/src/preprocess_plugin.cu
@@ -14,344 +14,334 @@
  * limitations under the License.
  */
 
+#include "common.hpp"
 #include "preprocess_plugin.hpp"
+
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
 
-#include "common.hpp"
 #include <iostream>
 
 // kernel for GPU
 
-template<typename T>
-__global__ void preprocess_kernel(const uint8_t * src_dev, 
-                                T* dst_dev, 
-                                int src_row_step, 
-                                int dst_row_step, 
-                                int src_img_step, 
-                                int dst_img_step,
-                                int src_h, 
-                                int src_w, 
-                                float radio_h, 
-                                float radio_w, 
-                                float offset_h, 
-                                float offset_w, 
-                                const float * mean, 
-                                const float * std,
-                                int dst_h,
-                                int dst_w,
-                                int n){
-
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if(idx >= dst_h * dst_w * n) return;
-    
-    int i = (idx / n) / dst_w;
-    int j = (idx / n) % dst_w;
-    int k = idx % n;
-
-	int pX = (int) roundf((i / radio_h) + offset_h);
-	int pY = (int) roundf((j / radio_w) + offset_w);
- 
-	if (pX < src_h && pX >= 0 && pY < src_w && pY >= 0){
-        int s1 = k * src_img_step + 0 * src_img_step / 3 + pX * src_row_step + pY;
-        int s2 = k * src_img_step + 1 * src_img_step / 3 + pX * src_row_step + pY;
-        int s3 = k * src_img_step + 2 * src_img_step / 3 + pX * src_row_step + pY;
-
-        int d1 = k * dst_img_step + 0 * dst_img_step / 3 + i * dst_row_step + j;
-        int d2 = k * dst_img_step + 1 * dst_img_step / 3 + i * dst_row_step + j;
-        int d3 = k * dst_img_step + 2 * dst_img_step / 3 + i * dst_row_step + j;
-
-		dst_dev[d1] = (static_cast<T>(src_dev[s1]) - static_cast<T>(mean[0])) / static_cast<T>(std[0]);
-		dst_dev[d2] = (static_cast<T>(src_dev[s2]) - static_cast<T>(mean[1])) / static_cast<T>(std[1]);
-		dst_dev[d3] = (static_cast<T>(src_dev[s3]) - static_cast<T>(mean[2])) / static_cast<T>(std[2]);
-	}
-}
+template <typename T>
+__global__ void preprocess_kernel(
+  const uint8_t * src_dev, T * dst_dev, int src_row_step, int dst_row_step, int src_img_step,
+  int dst_img_step, int src_h, int src_w, float radio_h, float radio_w, float offset_h,
+  float offset_w, const float * mean, const float * std, int dst_h, int dst_w, int n)
+{
+  int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= dst_h * dst_w * n) return;
+
+  int i = (idx / n) / dst_w;
+  int j = (idx / n) % dst_w;
+  int k = idx % n;
+
+  int pX = (int)roundf((i / radio_h) + offset_h);
+  int pY = (int)roundf((j / radio_w) + offset_w);
+
+  if (pX < src_h && pX >= 0 && pY < src_w && pY >= 0) {
+    int s1 = k * src_img_step + 0 * src_img_step / 3 + pX * src_row_step + pY;
+    int s2 = k * src_img_step + 1 * src_img_step / 3 + pX * src_row_step + pY;
+    int s3 = k * src_img_step + 2 * src_img_step / 3 + pX * src_row_step + pY;
 
+    int d1 = k * dst_img_step + 0 * dst_img_step / 3 + i * dst_row_step + j;
+    int d2 = k * dst_img_step + 1 * dst_img_step / 3 + i * dst_row_step + j;
+    int d3 = k * dst_img_step + 2 * dst_img_step / 3 + i * dst_row_step + j;
+
+    dst_dev[d1] = (static_cast<T>(src_dev[s1]) - static_cast<T>(mean[0])) / static_cast<T>(std[0]);
+    dst_dev[d2] = (static_cast<T>(src_dev[s2]) - static_cast<T>(mean[1])) / static_cast<T>(std[1]);
+    dst_dev[d3] = (static_cast<T>(src_dev[s3]) - static_cast<T>(mean[2])) / static_cast<T>(std[2]);
+  }
+}
 
 namespace nvinfer1
 {
 // class PreprocessPlugin
-PreprocessPlugin::PreprocessPlugin(const std::string &name, int crop_h, int crop_w, float resize_radio):
-    name_(name){
-    m_.crop_h = crop_h;
-    m_.crop_w = crop_w;
-    m_.resize_radio = resize_radio;
-}
-
-PreprocessPlugin::PreprocessPlugin(const std::string &name, const void *buffer, size_t length):
-    name_(name){
-    memcpy(&m_, buffer, sizeof(m_));
-}
-
-PreprocessPlugin::~PreprocessPlugin(){
-}
-
-IPluginV2DynamicExt *PreprocessPlugin::clone() const noexcept {
-    auto p = new PreprocessPlugin(name_, &m_, sizeof(m_));
-    p->setPluginNamespace(namespace_.c_str());
-    return p;
-}
-
-int32_t PreprocessPlugin::getNbOutputs() const noexcept {
-    return 1;
-}
- 
-DataType PreprocessPlugin::getOutputDataType(int32_t index, DataType const *inputTypes, 
-                                                                int32_t nbInputs) const noexcept {
-    // return DataType::kHALF;
-    return DataType::kFLOAT;
-}
-
-DimsExprs PreprocessPlugin::getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, 
-                                        int32_t nbInputs, IExprBuilder &exprBuilder) noexcept {
-    int input_h = inputs[0].d[2]->getConstantValue();
-    int input_w = inputs[0].d[3]->getConstantValue() * 4;
-
-    int output_h = input_h * m_.resize_radio - m_.crop_h;
-    int output_w = input_w * m_.resize_radio - m_.crop_w;
-
-    DimsExprs ret;
-    ret.nbDims = inputs[0].nbDims;
-    ret.d[0] = inputs[0].d[0];
-    ret.d[1] = inputs[0].d[1];
-    ret.d[2] =  exprBuilder.constant(output_h);
-    ret.d[3] =  exprBuilder.constant(output_w);
-    
-    return ret; 
-}
-
-bool PreprocessPlugin::supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut,
-                                                    int32_t nbInputs, int32_t nbOutputs) noexcept {
-    bool res;
-    switch (pos)
-    {
-    case 0: // images
-        res = (inOut[0].type == DataType::kINT8 || inOut[0].type == DataType::kINT32) && 
-                inOut[0].format == TensorFormat::kLINEAR;
-        break;
-    case 1: // mean
-        res = (inOut[1].type == DataType::kFLOAT) &&
-                inOut[1].format == TensorFormat::kLINEAR;
-        break;
-    case 2: // std
-        res = (inOut[2].type == DataType::kFLOAT) &&
-                inOut[2].format == TensorFormat::kLINEAR;
-        break;
-    case 3: // 输出 img tensor
-        res = (inOut[3].type == DataType::kFLOAT || inOut[3].type == DataType::kHALF) && 
-                inOut[3].format == inOut[0].format;
-
-        // res = inOut[3].type == DataType::kHALF && inOut[3].format == inOut[0].format;
-        break;
-    default: 
-        res = false;
-    }
-    return res;
+PreprocessPlugin::PreprocessPlugin(
+  const std::string & name, int crop_h, int crop_w, float resize_radio)
+: name_(name)
+{
+  m_.crop_h = crop_h;
+  m_.crop_w = crop_w;
+  m_.resize_radio = resize_radio;
 }
 
-void PreprocessPlugin::configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, 
-                                    const DynamicPluginTensorDesc *out, int32_t nbOutputs) noexcept {
-    return;
+PreprocessPlugin::PreprocessPlugin(const std::string & name, const void * buffer, size_t length)
+: name_(name)
+{
+  memcpy(&m_, buffer, sizeof(m_));
 }
 
-size_t PreprocessPlugin::getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, 
-                                const PluginTensorDesc *outputs, int32_t nbOutputs) const noexcept {
-    return 0;
+PreprocessPlugin::~PreprocessPlugin()
+{
 }
 
-int32_t PreprocessPlugin::enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc,
-    const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
-    
-    int n_img = inputDesc[0].dims.d[0];
-    int src_img_h = inputDesc[0].dims.d[2];
-    int src_img_w = inputDesc[0].dims.d[3] * 4;
+IPluginV2DynamicExt * PreprocessPlugin::clone() const noexcept
+{
+  auto p = new PreprocessPlugin(name_, &m_, sizeof(m_));
+  p->setPluginNamespace(namespace_.c_str());
+  return p;
+}
 
-    int dst_img_h = outputDesc[0].dims.d[2];
-    int dst_img_w = outputDesc[0].dims.d[3];
+int32_t PreprocessPlugin::getNbOutputs() const noexcept
+{
+  return 1;
+}
 
-    int src_row_step = src_img_w;
-    int dst_row_step = dst_img_w;
+DataType PreprocessPlugin::getOutputDataType(
+  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
+{
+  // return DataType::kHALF;
+  return DataType::kFLOAT;
+}
 
-    int src_img_step = src_img_w * src_img_h * 3;
-    int dst_img_step = dst_img_w * dst_img_h * 3;
+DimsExprs PreprocessPlugin::getOutputDimensions(
+  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
+  IExprBuilder & exprBuilder) noexcept
+{
+  int input_h = inputs[0].d[2]->getConstantValue();
+  int input_w = inputs[0].d[3]->getConstantValue() * 4;
 
-    float offset_h = m_.crop_h / m_.resize_radio;
-    float offset_w = m_.crop_w / m_.resize_radio;
+  int output_h = input_h * m_.resize_radio - m_.crop_h;
+  int output_w = input_w * m_.resize_radio - m_.crop_w;
 
-    dim3 grid(DIVUP(dst_img_h * dst_img_w * n_img,  NUM_THREADS));
-    dim3 block(NUM_THREADS);
+  DimsExprs ret;
+  ret.nbDims = inputs[0].nbDims;
+  ret.d[0] = inputs[0].d[0];
+  ret.d[1] = inputs[0].d[1];
+  ret.d[2] = exprBuilder.constant(output_h);
+  ret.d[3] = exprBuilder.constant(output_w);
 
-    switch (int(outputDesc[0].type))
-    {
-    case int(DataType::kFLOAT):
-        // printf("pre : float\n");
-        preprocess_kernel<<<grid, block, 0, stream>>>(
-                                                reinterpret_cast<const uint8_t *>(inputs[0]),
-                                                reinterpret_cast<float *>(outputs[0]),
-                                                src_row_step, 
-                                                dst_row_step, 
-                                                src_img_step,
-                                                dst_img_step, 
-                                                src_img_h, 
-                                                src_img_w, 
-                                                m_.resize_radio,
-                                                m_.resize_radio, 
-                                                offset_h, 
-                                                offset_w, 
-                                                reinterpret_cast<const float *>(inputs[1]), 
-                                                reinterpret_cast<const float *>(inputs[2]),
-                                                dst_img_h, 
-                                                dst_img_w,
-                                                n_img);
-        break;
-    case int(DataType::kHALF):
-        // printf("pre : half\n");
-        preprocess_kernel<<<grid, block, 0, stream>>>(
-                                                reinterpret_cast<const uint8_t *>(inputs[0]),
-                                                reinterpret_cast<__half *>(outputs[0]),
-                                                src_row_step, 
-                                                dst_row_step, 
-                                                src_img_step,
-                                                dst_img_step, 
-                                                src_img_h, 
-                                                src_img_w, 
-                                                m_.resize_radio,
-                                                m_.resize_radio, 
-                                                offset_h, 
-                                                offset_w, 
-                                                reinterpret_cast<const float *>(inputs[1]), 
-                                                reinterpret_cast<const float *>(inputs[2]),
-                                                dst_img_h, 
-                                                dst_img_w,
-                                                n_img);
-
-        break;
-    default: // should NOT be here
-        printf("\tUnsupport datatype!\n");
-    }
-    return 0;
+  return ret;
 }
 
-void PreprocessPlugin::destroy() noexcept {
-    delete this;
-    return;
+bool PreprocessPlugin::supportsFormatCombination(
+  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+{
+  bool res;
+  switch (pos) {
+    case 0:  // images
+      res = (inOut[0].type == DataType::kINT8 || inOut[0].type == DataType::kINT32) &&
+            inOut[0].format == TensorFormat::kLINEAR;
+      break;
+    case 1:  // mean
+      res = (inOut[1].type == DataType::kFLOAT) && inOut[1].format == TensorFormat::kLINEAR;
+      break;
+    case 2:  // std
+      res = (inOut[2].type == DataType::kFLOAT) && inOut[2].format == TensorFormat::kLINEAR;
+      break;
+    case 3:  // 输出 img tensor
+      res = (inOut[3].type == DataType::kFLOAT || inOut[3].type == DataType::kHALF) &&
+            inOut[3].format == inOut[0].format;
+
+      // res = inOut[3].type == DataType::kHALF && inOut[3].format == inOut[0].format;
+      break;
+    default:
+      res = false;
+  }
+  return res;
+}
+
+void PreprocessPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
+  int32_t nbOutputs) noexcept
+{
+  return;
 }
 
-int32_t PreprocessPlugin::initialize() noexcept {
-    return 0;
+size_t PreprocessPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
+  int32_t nbOutputs) const noexcept
+{
+  return 0;
 }
 
-void PreprocessPlugin::terminate() noexcept {
-    return;
+int32_t PreprocessPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  int n_img = inputDesc[0].dims.d[0];
+  int src_img_h = inputDesc[0].dims.d[2];
+  int src_img_w = inputDesc[0].dims.d[3] * 4;
+
+  int dst_img_h = outputDesc[0].dims.d[2];
+  int dst_img_w = outputDesc[0].dims.d[3];
+
+  int src_row_step = src_img_w;
+  int dst_row_step = dst_img_w;
+
+  int src_img_step = src_img_w * src_img_h * 3;
+  int dst_img_step = dst_img_w * dst_img_h * 3;
+
+  float offset_h = m_.crop_h / m_.resize_radio;
+  float offset_w = m_.crop_w / m_.resize_radio;
+
+  dim3 grid(DIVUP(dst_img_h * dst_img_w * n_img, NUM_THREADS));
+  dim3 block(NUM_THREADS);
+
+  switch (int(outputDesc[0].type)) {
+    case int(DataType::kFLOAT):
+      // printf("pre : float\n");
+      preprocess_kernel<<<grid, block, 0, stream>>>(
+        reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<float *>(outputs[0]),
+        src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
+        m_.resize_radio, m_.resize_radio, offset_h, offset_w,
+        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const float *>(inputs[2]),
+        dst_img_h, dst_img_w, n_img);
+      break;
+    case int(DataType::kHALF):
+      // printf("pre : half\n");
+      preprocess_kernel<<<grid, block, 0, stream>>>(
+        reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<__half *>(outputs[0]),
+        src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
+        m_.resize_radio, m_.resize_radio, offset_h, offset_w,
+        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const float *>(inputs[2]),
+        dst_img_h, dst_img_w, n_img);
+
+      break;
+    default:  // should NOT be here
+      printf("\tUnsupport datatype!\n");
+  }
+  return 0;
+}
+
+void PreprocessPlugin::destroy() noexcept
+{
+  delete this;
+  return;
+}
+
+int32_t PreprocessPlugin::initialize() noexcept
+{
+  return 0;
 }
 
-size_t PreprocessPlugin::getSerializationSize() const noexcept {
-    return sizeof(m_);
+void PreprocessPlugin::terminate() noexcept
+{
+  return;
 }
 
-void PreprocessPlugin::serialize(void *buffer) const noexcept {
-    memcpy(buffer, &m_, sizeof(m_));
-    return;
+size_t PreprocessPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(m_);
 }
 
-void PreprocessPlugin::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void PreprocessPlugin::serialize(void * buffer) const noexcept
+{
+  memcpy(buffer, &m_, sizeof(m_));
+  return;
 }
 
-const char *PreprocessPlugin::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+void PreprocessPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *PreprocessPlugin::getPluginType() const noexcept {
-    return PRE_PLUGIN_NAME;
+const char * PreprocessPlugin::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *PreprocessPlugin::getPluginVersion() const noexcept {
-    return PRE_PLUGIN_VERSION;
+const char * PreprocessPlugin::getPluginType() const noexcept
+{
+  return PRE_PLUGIN_NAME;
 }
 
-void PreprocessPlugin::attachToContext(cudnnContext *contextCudnn, cublasContext *contextCublas, 
-                                                        IGpuAllocator *gpuAllocator) noexcept {
-    return;
+const char * PreprocessPlugin::getPluginVersion() const noexcept
+{
+  return PRE_PLUGIN_VERSION;
 }
 
-void PreprocessPlugin::detachFromContext() noexcept {
-    return;
+void PreprocessPlugin::attachToContext(
+  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
+{
+  return;
+}
+
+void PreprocessPlugin::detachFromContext() noexcept
+{
+  return;
 }
 
 // class PreprocessPluginCreator
-PluginFieldCollection    PreprocessPluginCreator::fc_ {};
+PluginFieldCollection PreprocessPluginCreator::fc_{};
 std::vector<PluginField> PreprocessPluginCreator::attr_;
 
-PreprocessPluginCreator::PreprocessPluginCreator() {
-    
-    attr_.clear();
-    attr_.emplace_back(PluginField("crop_h", nullptr, PluginFieldType::kINT32, 1));
-    attr_.emplace_back(PluginField("crop_w", nullptr, PluginFieldType::kINT32, 1));
-    attr_.emplace_back(PluginField("resize_radio", nullptr, PluginFieldType::kFLOAT32, 1));
+PreprocessPluginCreator::PreprocessPluginCreator()
+{
+  attr_.clear();
+  attr_.emplace_back(PluginField("crop_h", nullptr, PluginFieldType::kINT32, 1));
+  attr_.emplace_back(PluginField("crop_w", nullptr, PluginFieldType::kINT32, 1));
+  attr_.emplace_back(PluginField("resize_radio", nullptr, PluginFieldType::kFLOAT32, 1));
 
-    fc_.nbFields = attr_.size();
-    fc_.fields   = attr_.data();
+  fc_.nbFields = attr_.size();
+  fc_.fields = attr_.data();
 }
 
-PreprocessPluginCreator::~PreprocessPluginCreator() {
+PreprocessPluginCreator::~PreprocessPluginCreator()
+{
 }
 
-
-IPluginV2DynamicExt *PreprocessPluginCreator::createPlugin(const char *name, 
-                                    const PluginFieldCollection *fc) noexcept {
-    
-    const PluginField *fields = fc->fields;
-
-    int crop_h = -1;
-    int crop_w = -1;
-    float resize_radio = 0.f;
-
-    for (int i = 0; i < fc->nbFields; ++i){
-        if(std::string(fc->fields[i].name) == std::string("crop_h")){
-            crop_h = *reinterpret_cast<const int *>(fc->fields[i].data);
-        }
-        else if(std::string(fc->fields[i].name) == std::string("crop_w")){
-            crop_w = *reinterpret_cast<const int *>(fc->fields[i].data);
-        }
-        else if(std::string(fc->fields[i].name) == std::string("resize_radio")){
-            resize_radio = *reinterpret_cast<const float *>(fc->fields[i].data);
-        }
+IPluginV2DynamicExt * PreprocessPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  const PluginField * fields = fc->fields;
+
+  int crop_h = -1;
+  int crop_w = -1;
+  float resize_radio = 0.f;
+
+  for (int i = 0; i < fc->nbFields; ++i) {
+    if (std::string(fc->fields[i].name) == std::string("crop_h")) {
+      crop_h = *reinterpret_cast<const int *>(fc->fields[i].data);
+    } else if (std::string(fc->fields[i].name) == std::string("crop_w")) {
+      crop_w = *reinterpret_cast<const int *>(fc->fields[i].data);
+    } else if (std::string(fc->fields[i].name) == std::string("resize_radio")) {
+      resize_radio = *reinterpret_cast<const float *>(fc->fields[i].data);
     }
-    PreprocessPlugin *pObj = new PreprocessPlugin(name, crop_h, crop_w, resize_radio);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+  }
+  PreprocessPlugin * pObj = new PreprocessPlugin(name, crop_h, crop_w, resize_radio);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-IPluginV2DynamicExt *PreprocessPluginCreator::deserializePlugin(const char *name, 
-                                        const void *serialData, size_t serialLength) noexcept {
-    PreprocessPlugin *pObj = new PreprocessPlugin(name, serialData, serialLength);
-    pObj->setPluginNamespace(namespace_.c_str());
-    return pObj;
+IPluginV2DynamicExt * PreprocessPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  PreprocessPlugin * pObj = new PreprocessPlugin(name, serialData, serialLength);
+  pObj->setPluginNamespace(namespace_.c_str());
+  return pObj;
 }
 
-void PreprocessPluginCreator::setPluginNamespace(const char *pluginNamespace) noexcept {
-    namespace_ = pluginNamespace;
-    return;
+void PreprocessPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  namespace_ = pluginNamespace;
+  return;
 }
 
-const char *PreprocessPluginCreator::getPluginNamespace() const noexcept {
-    return namespace_.c_str();
+const char * PreprocessPluginCreator::getPluginNamespace() const noexcept
+{
+  return namespace_.c_str();
 }
 
-const char *PreprocessPluginCreator::getPluginName() const noexcept {
-    return PRE_PLUGIN_NAME;
+const char * PreprocessPluginCreator::getPluginName() const noexcept
+{
+  return PRE_PLUGIN_NAME;
 }
 
-const char *PreprocessPluginCreator::getPluginVersion() const noexcept {
-    return PRE_PLUGIN_VERSION;
+const char * PreprocessPluginCreator::getPluginVersion() const noexcept
+{
+  return PRE_PLUGIN_VERSION;
 }
 
-const PluginFieldCollection *PreprocessPluginCreator::getFieldNames() noexcept {
-    return &fc_;
+const PluginFieldCollection * PreprocessPluginCreator::getFieldNames() noexcept
+{
+  return &fc_;
 }
 
 REGISTER_TENSORRT_PLUGIN(PreprocessPluginCreator);
 
-} // namespace nvinfer1
+}  // namespace nvinfer1

From f17c6b0f4fb7c40daf4c1cb29ae46e61da42a596 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 12 Jul 2024 10:15:23 +0800
Subject: [PATCH 03/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/tensorrt_bevdet/CMakeLists.txt          | 1 -
 perception/tensorrt_bevdet/include/bevdet_node.hpp | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/perception/tensorrt_bevdet/CMakeLists.txt b/perception/tensorrt_bevdet/CMakeLists.txt
index d50811bce1898..72de3a4ee4e05 100644
--- a/perception/tensorrt_bevdet/CMakeLists.txt
+++ b/perception/tensorrt_bevdet/CMakeLists.txt
@@ -96,7 +96,6 @@ install(TARGETS ${PROJECT_NAME}_node
 install(DIRECTORY 
   launch
   config
-  data
   DESTINATION share/${PROJECT_NAME}/
 )
 
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index 832ac87d382b5..da8232e19d9ae 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -39,9 +39,6 @@
 #include <message_filters/sync_policies/approximate_time.h>
 
 
-using std::chrono::duration;
-using std::chrono::high_resolution_clock;
-
 typedef pcl::PointXYZI PointT;
 
 uint8_t getSemanticType(const std::string & class_name);

From eca8ed0998bce9f403e1e7df7aa42cc58191e50c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 12 Jul 2024 02:58:00 +0000
Subject: [PATCH 04/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/iou3d_nms.hpp   |  3 ++-
 perception/tensorrt_bevdet/include/postprocess.hpp |  5 +++--
 perception/tensorrt_bevdet/src/iou3d_nms.cu        | 10 +++++++---
 perception/tensorrt_bevdet/src/postprocess.cu      |  7 ++++---
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index 2d7963d9e70c1..e58337f4ca2b4 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -5,6 +5,8 @@ All Rights Reserved 2019-2022.
 */
 #pragma once
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -16,7 +18,6 @@ All Rights Reserved 2019-2022.
 
 #include <iostream>
 #include <vector>
-#include "common.hpp"
 
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 5fc27a36a8def..149b1c10dd78d 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -1,9 +1,10 @@
 #ifndef POSTPROCESS_HPP_
 #define POSTPROCESS_HPP_
 
-#include <vector>
-#include "iou3d_nms.hpp"
 #include "common.hpp"
+#include "iou3d_nms.hpp"
+
+#include <vector>
 
 class PostprocessGPU
 {
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
index 9d736fcd3e6bd..9a0b035184973 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -451,7 +451,8 @@ __global__ void RotatedNmsWithIndicesKernel(
     int start = 0;
     if (row_start == col_start) {
       start = threadIdx.x + 1;  // Isn't it right now to compare with [0~threadIdx. x)? FIXME
-    }  // I think this is correct, and there is no need to compare it with numbers smaller than my own. If col is smaller than row, there is also no need to compare it
+    }  // I think this is correct, and there is no need to compare it with numbers smaller than my
+       // own. If col is smaller than row, there is also no need to compare it
     for (i = start; i < col_size; i++) {
       if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
         t |= 1ULL << i;
@@ -599,9 +600,12 @@ int Iou3dNmsCuda::DoIou3dNms(
   for (int i = 0; i < box_num_pre; i++) {
     int nblock = i / THREADS_PER_BLOCK_NMS;
     int inblock = i % THREADS_PER_BLOCK_NMS;
-    if (!(host_remv[nblock] & (1ULL << inblock))) {  // Meeting this requirement indicates that it does not conflict with any of the boxes selected earlier
+    if (!(host_remv[nblock] &
+          (1ULL << inblock))) {  // Meeting this requirement indicates that it does not conflict
+                                 // with any of the boxes selected earlier
       host_keep_data[num_to_keep++] = i;  // Add the satisfied box
-      for (int j = nblock; j < col_blocks; j++) {  // Consider boxes with index i that overlap beyond the threshold
+      for (int j = nblock; j < col_blocks;
+           j++) {  // Consider boxes with index i that overlap beyond the threshold
         host_remv[j] |= host_mask[i * col_blocks + j];
       }
     }
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/tensorrt_bevdet/src/postprocess.cu
index ca95a4d32e19c..46a2a26a230d4 100644
--- a/perception/tensorrt_bevdet/src/postprocess.cu
+++ b/perception/tensorrt_bevdet/src/postprocess.cu
@@ -20,7 +20,8 @@ __global__ void BEVDecodeObjectKernel(
   const int cls_range, const float * reg, const float * hei, const float * dim, const float * rot,
   const float * vel, const float * cls, float * res_box, float * res_conf, int * res_cls,
   int * res_box_num, float * rescale_factor)
-{  // According to the confidence level, the initial screening showed that there were res-box_num boxes after screening, not exceeding nms_pre_max_size 4096
+{  // According to the confidence level, the initial screening showed that there were res-box_num
+   // boxes after screening, not exceeding nms_pre_max_size 4096
   int idx = threadIdx.x + blockDim.x * blockIdx.x;
   if (idx >= map_size) return;
 
@@ -153,8 +154,8 @@ void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box> &
     thrust::sort_by_key(
       thrust::device, score_dev, score_dev + box_num_pre, sorted_indices_dev,
       thrust::greater<float>());
-    // score_dev is sorted in descending order, while sorted_indices_dev indexes the original order,，
-    // sorted_indices_dev[i] = j; i:current，j:pre-index;  j:[0, map_size]
+    // score_dev is sorted in descending order, while sorted_indices_dev indexes the original
+    // order,， sorted_indices_dev[i] = j; i:current，j:pre-index;  j:[0, map_size]
 
     box_num_pre = std::min(box_num_pre, nms_pre_maxnum);
 

From acb33f0497e91fe4e9a3d833a262ef33783c802b Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 19 Jul 2024 13:48:14 +0800
Subject: [PATCH 05/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 1cfe340596966..a0d14545201b3 100644
--- a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -11,11 +11,11 @@
   <arg name="output/pointcloud" default="points_painting"/>
   <arg name="output/boxes" default="boxes"/>
 
-  <arg name="onnx_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.onnx"/>
-  <arg name="engine_file" default="$(find-pkg-share trt_bevdet)/data/bevdet_one_lt_d.engine"/>
-  <arg name="cam_config" default="$(find-pkg-share trt_bevdet)/config/cams_param.yaml"/>
-  <arg name="model_config" default="$(find-pkg-share trt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
-  <arg name="param_file" default="$(find-pkg-share trt_bevdet)/config/bevdet.param.yaml"/>
+  <arg name="onnx_file" default="$(find-pkg-share tensorrt_bevdet)/data/bevdet_one_lt_d.onnx"/>
+  <arg name="engine_file" default="$(find-pkg-share tensorrt_bevdet)/data/bevdet_one_lt_d.engine"/>
+  <arg name="cam_config" default="$(find-pkg-share tensorrt_bevdet)/config/cams_param.yaml"/>
+  <arg name="model_config" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
+  <arg name="param_file" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet.param.yaml"/>
 
   <!-- bevdet node -->
   <node pkg="tensorrt_bevdet" exec="tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">

From 3baff540b7df13b3d2786c7855686d8b21317805 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Jul 2024 06:08:42 +0000
Subject: [PATCH 06/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/iou3d_nms.hpp   | 3 ++-
 perception/tensorrt_bevdet/include/postprocess.hpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index 2d7963d9e70c1..e58337f4ca2b4 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -5,6 +5,8 @@ All Rights Reserved 2019-2022.
 */
 #pragma once
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -16,7 +18,6 @@ All Rights Reserved 2019-2022.
 
 #include <iostream>
 #include <vector>
-#include "common.hpp"
 
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 2efbcf08f5fd8..184d881f304df 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -1,9 +1,10 @@
 #ifndef POSTPROCESS_HPP_
 #define POSTPROCESS_HPP_
 
-#include <vector>
 #include "iou3d_nms.hpp"
 
+#include <vector>
+
 class PostprocessGPU
 {
 public:

From 0dac36134be52047f3092440749ceba98e850e44 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 23 Jul 2024 17:45:30 +0800
Subject: [PATCH 07/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detector/camera_bev_detector.launch.xml    | 11 +++++++++--
 .../include/alignbev_plugin.hpp                |  3 +++
 perception/tensorrt_bevdet/include/bevdet.hpp  | 13 +++++++++++++
 .../tensorrt_bevdet/include/bevdet_node.hpp    | 18 ++++++++++++++++++
 .../tensorrt_bevdet/include/bevpool_plugin.hpp |  3 +++
 perception/tensorrt_bevdet/include/common.hpp  | 13 +++++++++++++
 .../include/cpu_jpegdecoder.hpp                | 13 +++++++++++++
 perception/tensorrt_bevdet/include/data.hpp    | 13 +++++++++++++
 .../include/gatherbev_plugin.hpp               |  3 +++
 .../tensorrt_bevdet/include/iou3d_nms.hpp      | 13 +++++++++++++
 .../tensorrt_bevdet/include/nvjpegdecoder.hpp  | 13 +++++++++++++
 .../tensorrt_bevdet/include/postprocess.hpp    | 13 +++++++++++++
 .../tensorrt_bevdet/include/preprocess.hpp     | 13 +++++++++++++
 .../include/preprocess_plugin.hpp              |  3 +++
 .../launch/tensorrt_bevdet.launch.xml          |  5 +++--
 perception/tensorrt_bevdet/src/bevdet.cpp      | 15 ++++++++++++++-
 perception/tensorrt_bevdet/src/bevdet_node.cpp | 13 +++++++++++++
 .../tensorrt_bevdet/src/cpu_jpegdecoder.cpp    | 13 +++++++++++++
 perception/tensorrt_bevdet/src/data.cpp        | 15 ++++++++++++++-
 perception/tensorrt_bevdet/src/iou3d_nms.cu    | 13 +++++++++++++
 .../tensorrt_bevdet/src/nvjpegdecoder.cpp      | 13 +++++++++++++
 perception/tensorrt_bevdet/src/postprocess.cu  | 13 +++++++++++++
 perception/tensorrt_bevdet/src/preprocess.cu   | 13 +++++++++++++
 23 files changed, 252 insertions(+), 6 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index 6bcab50c3527e..069ef73a127e5 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -21,8 +21,12 @@
   <arg name="input/camera7/image"/>
   <arg name="input/camera7/info"/>
   <arg name="number_of_cameras"/>
-  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
 
+  <!-- Model parameters -->
+  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data/tensorrt_bevdet" description="packages data and artifacts directory path"/>
+  <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx" description="onnx model file path"/>
+  <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine" description="engine model file path"/>
   <!-- BEVDet -->
   <group if="$(eval &quot;'$(var bev_detection_model)'=='bevdet'&quot;)">
     <push-ros-namespace namespace="bevdet"/>
@@ -36,7 +40,10 @@
         <arg name="input/img_b" value="$(var input/camera4/image)"/>
         <arg name="input/img_br" value="$(var input/camera5/image)"/>
         <arg name="output/pointcloud" value="points_painting"/>
-        <arg name="output/pointcboxesloud" value="boxes"/>
+        <arg name="output/boxes" value="boxes"/>
+        <arg name="data_path" value="$(var data_path)/"/>
+        <arg name="onnx_file" value="$(var onnx_file)"/>
+        <arg name="engine_file" value="$(var engine_file)"/>
       </include>
     </group>
   </group>
diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
index a91d43cf6ce28..e782253e7b7fa 100644
--- a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef ALIGNBEV_PLUGIN_HPP_
+#define ALIGNBEV_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -114,3 +116,4 @@ class AlignBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
+#endif  //ALIGNBEV_PLUGIN_HPP_
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index ec873b2f4286f..68b1f8142b527 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef BEVDET_HPP_
 #define BEVDET_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index be63b4c204c4c..f34355bf146ba 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -1,3 +1,19 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef BEVDET_NODE_HPP_
+#define BEVDET_NODE_HPP_
+
 #include "bevdet.hpp"
 #include "cpu_jpegdecoder.hpp"
 
@@ -125,3 +141,5 @@ class TRTBEVDetNode : public rclcpp::Node
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
 };
+
+#endif  //BEVDET_NODE_HPP_
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
index 2d41fdae42389..8ffdf55813099 100644
--- a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef BEVPOOL_PLUGIN_HPP_
+#define BEVPOOL_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -118,3 +120,4 @@ class BEVPoolPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
+#endif  //BEVPOOL_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
index 5ee37cf3920f6..67a25c8a77565 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef COMMON_HPP_
 #define COMMON_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
index 8be620940717b..3544212bf2c68 100644
--- a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
+++ b/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef CPU_JPEGDECODER_HPP_
 #define CPU_JPEGDECODER_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
index dfbb75dceeadd..2cfb122ddc91b 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef DATA_HPP_
 #define DATA_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
index 6da17a2ac28c2..48c08564ba770 100644
--- a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef GATHERBEV_PLUGIN_HPP_
+#define GATHERBEV_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -115,3 +117,4 @@ class GatherBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
+#endif  //GATHERBEV_PLUGIN_HPP_
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index 2d7963d9e70c1..e539fa21b04c6 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
 Written by Shaoshuai Shi
diff --git a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
index 4e727893f259c..8988bc3e2ad08 100644
--- a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
+++ b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef NVJPEGDECODER_HPP_
 #define NVJPEGDECODER_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 2efbcf08f5fd8..fa9940b3781ad 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef POSTPROCESS_HPP_
 #define POSTPROCESS_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/preprocess.hpp b/perception/tensorrt_bevdet/include/preprocess.hpp
index 7f8b2cef96781..3abe82844655c 100644
--- a/perception/tensorrt_bevdet/include/preprocess.hpp
+++ b/perception/tensorrt_bevdet/include/preprocess.hpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef PREPROCESS_HPP_
 #define PREPROCESS_HPP_
 
diff --git a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
index bd0ff8bf0b8c2..58e02cd7760bd 100644
--- a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#ifndef PREPROCESS_PLUGIN_HPP_
+#define PREPROCESS_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -117,3 +119,4 @@ class PreprocessPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
+#endif //PREPROCESS_PLUGIN_HPP_
\ No newline at end of file
diff --git a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index a0d14545201b3..0e55056962039 100644
--- a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -11,8 +11,9 @@
   <arg name="output/pointcloud" default="points_painting"/>
   <arg name="output/boxes" default="boxes"/>
 
-  <arg name="onnx_file" default="$(find-pkg-share tensorrt_bevdet)/data/bevdet_one_lt_d.onnx"/>
-  <arg name="engine_file" default="$(find-pkg-share tensorrt_bevdet)/data/bevdet_one_lt_d.engine"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data/tensorrt_bevdet"/>
+  <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx"/>
+  <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine"/>
   <arg name="cam_config" default="$(find-pkg-share tensorrt_bevdet)/config/cams_param.yaml"/>
   <arg name="model_config" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
   <arg name="param_file" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet.param.yaml"/>
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
index c258e2dd6db02..674bca66328b0 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "bevdet.hpp"
 
 #include "alignbev_plugin.hpp"
@@ -164,7 +177,7 @@ void BEVDet::InitParams(const std::string & config_file)
   std::vector<std::vector<float>> nms_factor_temp =
     model_config["test_cfg"]["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
   nms_rescale_factor.clear();
-  for (auto task_factors : nms_factor_temp) {
+  for (const auto& task_factors : nms_factor_temp) {
     for (float factor : task_factors) {
       nms_rescale_factor.push_back(factor);
     }
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/tensorrt_bevdet/src/bevdet_node.cpp
index 8d2acc87c848f..182dad76645b2 100644
--- a/perception/tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet_node.cpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "bevdet_node.hpp"
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index 1205e6fe71bdd..e5443a7e33869 100644
--- a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "cpu_jpegdecoder.hpp"
 
 #include "preprocess.hpp"
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index b22ac49d7c209..35f720b201eae 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "data.hpp"
 
 #include "cpu_jpegdecoder.hpp"
@@ -34,7 +47,7 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   cams2ego_rot.clear();
   cams2ego_trans.clear();
 
-  for (std::string name : cams_name) {
+  for (const std::string& name : cams_name) {
     imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
 
     //
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
index 9a0b035184973..2c0f214f9d823 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
 3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
 Written by Shaoshuai Shi
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index f8987f9529965..cf344ebe8b38a 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifdef __HAVE_NVJPEG__
 
 #include "nvjpegdecoder.hpp"
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/tensorrt_bevdet/src/postprocess.cu
index 46a2a26a230d4..8a237dfbcd098 100644
--- a/perception/tensorrt_bevdet/src/postprocess.cu
+++ b/perception/tensorrt_bevdet/src/postprocess.cu
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "postprocess.hpp"
 
 #include <thrust/device_vector.h>
diff --git a/perception/tensorrt_bevdet/src/preprocess.cu b/perception/tensorrt_bevdet/src/preprocess.cu
index 2fcb849f58b15..a457817d9601c 100644
--- a/perception/tensorrt_bevdet/src/preprocess.cu
+++ b/perception/tensorrt_bevdet/src/preprocess.cu
@@ -1,3 +1,16 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "common.hpp"
 #include "preprocess.hpp"
 

From 3149fd3d496029fd5dd439410b4b451e389d4edf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 09:54:53 +0000
Subject: [PATCH 08/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/alignbev_plugin.hpp   | 2 +-
 perception/tensorrt_bevdet/include/bevdet_node.hpp       | 2 +-
 perception/tensorrt_bevdet/include/bevpool_plugin.hpp    | 2 +-
 perception/tensorrt_bevdet/include/gatherbev_plugin.hpp  | 2 +-
 perception/tensorrt_bevdet/include/iou3d_nms.hpp         | 7 ++++---
 perception/tensorrt_bevdet/include/preprocess_plugin.hpp | 2 +-
 perception/tensorrt_bevdet/src/bevdet.cpp                | 2 +-
 perception/tensorrt_bevdet/src/data.cpp                  | 2 +-
 8 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
index e782253e7b7fa..94352c8ee3ba6 100644
--- a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
@@ -116,4 +116,4 @@ class AlignBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  //ALIGNBEV_PLUGIN_HPP_
\ No newline at end of file
+#endif  // ALIGNBEV_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index f34355bf146ba..64d6953b807d0 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -142,4 +142,4 @@ class TRTBEVDetNode : public rclcpp::Node
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
 };
 
-#endif  //BEVDET_NODE_HPP_
\ No newline at end of file
+#endif  // BEVDET_NODE_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
index 8ffdf55813099..a79a58b000f71 100644
--- a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
@@ -120,4 +120,4 @@ class BEVPoolPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  //BEVPOOL_PLUGIN_HPP_
+#endif  // BEVPOOL_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
index 48c08564ba770..9e5c5280e78d7 100644
--- a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
@@ -117,4 +117,4 @@ class GatherBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  //GATHERBEV_PLUGIN_HPP_
\ No newline at end of file
+#endif  // GATHERBEV_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index 47de5fa9c77ca..cdf8ee1216687 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -20,6 +20,8 @@ All Rights Reserved 2019-2022.
 #ifndef IOU3D_NMS_HPP_
 #define IOU3D_NMS_HPP_
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -28,11 +30,10 @@ All Rights Reserved 2019-2022.
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
+
 #include <iostream>
 #include <vector>
 
-#include "common.hpp"
-
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
 const float EPS = 1e-8;
@@ -70,4 +71,4 @@ struct Box
   int label;
   bool is_drop;  // for nms
 };
-#endif //IOU3D_NMS_HPP_
+#endif  // IOU3D_NMS_HPP_
diff --git a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
index 58e02cd7760bd..2c40864b61259 100644
--- a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
@@ -119,4 +119,4 @@ class PreprocessPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif //PREPROCESS_PLUGIN_HPP_
\ No newline at end of file
+#endif  // PREPROCESS_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
index 674bca66328b0..dbb26c5870f89 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -177,7 +177,7 @@ void BEVDet::InitParams(const std::string & config_file)
   std::vector<std::vector<float>> nms_factor_temp =
     model_config["test_cfg"]["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
   nms_rescale_factor.clear();
-  for (const auto& task_factors : nms_factor_temp) {
+  for (const auto & task_factors : nms_factor_temp) {
     for (float factor : task_factors) {
       nms_rescale_factor.push_back(factor);
     }
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index 35f720b201eae..fa4188488d04c 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -47,7 +47,7 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   cams2ego_rot.clear();
   cams2ego_trans.clear();
 
-  for (const std::string& name : cams_name) {
+  for (const std::string & name : cams_name) {
     imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
 
     //

From 956fd286923124fb4aaf1e2f904290b7ed0ad257 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 31 Jul 2024 18:29:08 +0800
Subject: [PATCH 09/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../config/bevdet_r50_4dlongterm_depth.yaml   |  1 +
 perception/tensorrt_bevdet/include/bevdet.hpp |  6 +--
 perception/tensorrt_bevdet/include/common.hpp | 12 +++---
 perception/tensorrt_bevdet/include/data.hpp   |  4 +-
 .../include/gatherbev_plugin.hpp              |  2 +-
 .../tensorrt_bevdet/include/iou3d_nms.hpp     |  7 ++--
 .../tensorrt_bevdet/include/nvjpegdecoder.hpp |  2 +-
 .../tensorrt_bevdet/include/postprocess.hpp   |  4 +-
 perception/tensorrt_bevdet/src/bevdet.cpp     | 42 +++++++++----------
 .../tensorrt_bevdet/src/bevdet_node.cpp       |  2 +-
 .../tensorrt_bevdet/src/cpu_jpegdecoder.cpp   |  4 +-
 perception/tensorrt_bevdet/src/data.cpp       |  8 ++--
 perception/tensorrt_bevdet/src/iou3d_nms.cu   | 30 ++++++-------
 .../tensorrt_bevdet/src/nvjpegdecoder.cpp     | 18 ++++----
 perception/tensorrt_bevdet/src/postprocess.cu |  2 +-
 15 files changed, 70 insertions(+), 74 deletions(-)

diff --git a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
index d9f16eee7a851..8f0e4ac785a97 100644
--- a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
+++ b/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
@@ -10,6 +10,7 @@ sampling: nearest # nearest # or bicubic
 data_config:
   Ncams: 6
   cams: [CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT, CAM_BACK_LEFT, CAM_BACK, CAM_BACK_RIGHT]
+  resize_radio: 0.44
   crop: [140, 0]
   flip: true
   input_size: [256, 704]
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 68b1f8142b527..0f97035f7ca18 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -37,7 +37,7 @@ class adjFrame
   adjFrame(int _n, size_t _buf_size)
   : n(_n), buf_size(_buf_size), scenes_token(_n), ego2global_rot(_n), ego2global_trans(_n)
   {
-    CHECK_CUDA(cudaMalloc((void **)&adj_buffer, _n * _buf_size));
+    CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&adj_buffer), _n * _buf_size));
     CHECK_CUDA(cudaMemset(adj_buffer, 0, _n * _buf_size));
     last = 0;
     buffer_num = 0;
@@ -68,7 +68,7 @@ class adjFrame
     while (iters--) {
       last = (last + 1) % n;
       CHECK_CUDA(cudaMemcpy(
-        (char *)adj_buffer + last * buf_size, curr_buffer, buf_size, cudaMemcpyDeviceToDevice));
+        reinterpret_cast<char *>(adj_buffer) + last * buf_size, curr_buffer, buf_size, cudaMemcpyDeviceToDevice));
       scenes_token[last] = curr_token;
       ego2global_rot[last] = _ego2global_rot;
       ego2global_trans[last] = _ego2global_trans;
@@ -81,7 +81,7 @@ class adjFrame
   const void * getFrameBuffer(int idx)
   {
     idx = (-idx + last + n) % n;
-    return (char *)adj_buffer + idx * buf_size;
+    return reinterpret_cast<char *>(adj_buffer) + idx * buf_size;
   }
   void getEgo2Global(
     int idx, Eigen::Quaternion<float> & adj_ego2global_rot,
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
index 67a25c8a77565..5142600e7523b 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -80,16 +80,16 @@ __inline__ std::string dataTypeToString(nvinfer1::DataType dataType)
 
 __inline__ size_t dataTypeToSize(nvinfer1::DataType dataType)
 {
-  switch ((int)dataType) {
-    case int(nvinfer1::DataType::kFLOAT):
+  switch (dataType) {
+    case nvinfer1::DataType::kFLOAT:
       return 4;
-    case int(nvinfer1::DataType::kHALF):
+    case nvinfer1::DataType::kHALF:
       return 2;
-    case int(nvinfer1::DataType::kINT8):
+    case nvinfer1::DataType::kINT8:
       return 1;
-    case int(nvinfer1::DataType::kINT32):
+    case nvinfer1::DataType::kINT32:
       return 4;
-    case int(nvinfer1::DataType::kBOOL):
+    case nvinfer1::DataType::kBOOL:
       return 1;
     default:
       return 4;
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
index 2cfb122ddc91b..0dbf95ef3e124 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -43,14 +43,14 @@ struct camParams
   //
   std::vector<std::string> imgs_file;
 
-  unsigned long long timestamp;
+  std::int64_t timestamp;
   std::string scene_token;
 };
 
 struct camsData
 {
   camsData() = default;
-  camsData(const camParams & _param) : param(_param), imgs_dev(nullptr) {};
+  explicit camsData(const camParams & _param) : param(_param), imgs_dev(nullptr) {}
   camParams param;
   uchar * imgs_dev;
 };
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
index 9e5c5280e78d7..af9ac003aa2ef 100644
--- a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
@@ -44,7 +44,7 @@ class GatherBEVPlugin : public IPluginV2DynamicExt
 
 public:
   GatherBEVPlugin() = delete;
-  GatherBEVPlugin(const std::string & name);
+  explicit GatherBEVPlugin(const std::string & name);
   GatherBEVPlugin(const std::string & name, const void * buffer, size_t length);
   ~GatherBEVPlugin();
 
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index cdf8ee1216687..c5c8870c4b457 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -20,8 +20,6 @@ All Rights Reserved 2019-2022.
 #ifndef IOU3D_NMS_HPP_
 #define IOU3D_NMS_HPP_
 
-#include "common.hpp"
-
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -33,9 +31,10 @@ All Rights Reserved 2019-2022.
 
 #include <iostream>
 #include <vector>
+#include "common.hpp"
 
 const int THREADS_PER_BLOCK = 16;
-const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
+const int THREADS_PER_BLOCK_NMS = sizeof(std::int64_t) * 8;
 const float EPS = 1e-8;
 
 class Iou3dNmsCuda
@@ -46,7 +45,7 @@ class Iou3dNmsCuda
 
   int DoIou3dNms(
     const int box_num_pre, const float * res_box, const int * res_sorted_indices,
-    long * host_keep_data);
+    std::int64_t * host_keep_data);
 
 private:
   const int head_x_size_;
diff --git a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
index 8988bc3e2ad08..e5eca93bd5843 100644
--- a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
+++ b/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
@@ -70,7 +70,7 @@ struct decode_params_t
 struct share_params
 {
   share_params() { fmt = NVJPEG_OUTPUT_UNCHANGED; }
-  share_params(size_t _fmt)
+  explicit share_params(size_t _fmt)
   {
     if (
       _fmt == DECODE_RGB || _fmt == DECODE_BGR || _fmt == DECODE_RGBI || _fmt == DECODE_BGRI ||
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 98c75bad96a16..0d60dd77eaa07 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -21,7 +21,7 @@
 class PostprocessGPU
 {
 public:
-  PostprocessGPU() {};
+  PostprocessGPU() {}
   PostprocessGPU(
     const int _class_num, const float _score_thresh, const float _nms_thresh,
     const int _nms_pre_maxnum, const int _nms_post_maxnum, const int _down_sample,
@@ -58,7 +58,7 @@ class PostprocessGPU
   int * cls_dev = nullptr;
   int * valid_box_num = nullptr;
   int * sorted_indices_dev = nullptr;
-  long * keep_data_host = nullptr;
+  std::int64_t * keep_data_host = nullptr;
   int * sorted_indices_host = nullptr;
   float * boxes_host = nullptr;
   float * score_host = nullptr;
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
index dbb26c5870f89..de99bc6c0b1b4 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -143,6 +143,7 @@ void BEVDet::InitParams(const std::string & config_file)
   src_img_w = model_config["data_config"]["src_size"][1].as<int>();
   input_img_h = model_config["data_config"]["input_size"][0].as<int>();
   input_img_w = model_config["data_config"]["input_size"][1].as<int>();
+  resize_radio = model_config["data_config"]["resize_radio"].as<float>();
   crop_h = model_config["data_config"]["crop"][0].as<int>();
   crop_w = model_config["data_config"]["crop"][1].as<int>();
   mean[0] = model_config["mean"][0].as<float>();
@@ -200,7 +201,6 @@ void BEVDet::InitParams(const std::string & config_file)
     out_num_task_head[common_head_name[i].as<std::string>()] = common_head_channel[i].as<int>();
   }
 
-  resize_radio = (float)input_img_w / src_img_w;
   feat_h = input_img_h / down_sample;
   feat_w = input_img_w / down_sample;
   depth_num = (depth_end - depth_start) / depth_step;
@@ -226,7 +226,7 @@ void BEVDet::InitParams(const std::string & config_file)
 void BEVDet::MallocDeviceMemory()
 {
   trt_buffer_sizes.resize(trt_engine->getNbBindings());
-  trt_buffer_dev = (void **)new void *[trt_engine->getNbBindings()];
+  trt_buffer_dev = reinterpret_cast<void **>(new void *[trt_engine->getNbBindings()]);
   for (int i = 0; i < trt_engine->getNbBindings(); i++) {
     nvinfer1::Dims32 dim = trt_context->getBindingDimensions(i);
     size_t size = 1;
@@ -240,7 +240,7 @@ void BEVDet::MallocDeviceMemory()
 
   std::cout << "img num binding : " << trt_engine->getNbBindings() << std::endl;
 
-  post_buffer = (void **)new void *[class_num_pre_task.size() * 6];
+  post_buffer = reinterpret_cast<void **>(new void *[class_num_pre_task.size() * 6]);
   for (size_t i = 0; i < class_num_pre_task.size(); i++) {
     post_buffer[i * 6 + 0] = trt_buffer_dev[buffer_map["reg_" + std::to_string(i)]];
     post_buffer[i * 6 + 1] = trt_buffer_dev[buffer_map["height_" + std::to_string(i)]];
@@ -266,9 +266,9 @@ void BEVDet::InitViewTransformer(
       for (int h_ = 0; h_ < feat_h; h_++) {
         for (int w_ = 0; w_ < feat_w; w_++) {
           int offset = i * depth_num * feat_h * feat_w + d_ * feat_h * feat_w + h_ * feat_w + w_;
-          (frustum + offset)->x() = (float)w_ * (input_img_w - 1) / (feat_w - 1);
-          (frustum + offset)->y() = (float)h_ * (input_img_h - 1) / (feat_h - 1);
-          (frustum + offset)->z() = (float)d_ * depth_step + depth_start;
+          (frustum + offset)->x() = static_cast<float>(w_) * (input_img_w - 1) / (feat_w - 1);
+          (frustum + offset)->y() = static_cast<float>(h_) * (input_img_h - 1) / (feat_h - 1);
+          (frustum + offset)->z() = static_cast<float>(d_) * depth_step + depth_start;
 
           // eliminate post transformation
           *(frustum + offset) -= post_trans.translation();
@@ -282,9 +282,9 @@ void BEVDet::InitViewTransformer(
 
           // voxelization
           *(frustum + offset) -= Eigen::Vector3f(x_start, y_start, z_start);
-          (frustum + offset)->x() = (int)((frustum + offset)->x() / x_step);
-          (frustum + offset)->y() = (int)((frustum + offset)->y() / y_step);
-          (frustum + offset)->z() = (int)((frustum + offset)->z() / z_step);
+          (frustum + offset)->x() = static_cast<int>((frustum + offset)->x() / x_step);
+          (frustum + offset)->y() = static_cast<int>((frustum + offset)->y() / y_step);
+          (frustum + offset)->z() = static_cast<int>((frustum + offset)->z() / z_step);
         }
       }
     }
@@ -308,9 +308,9 @@ void BEVDet::InitViewTransformer(
   std::vector<int> kept;
   for (int i = 0; i < num_points; i++) {
     if (
-      (int)(frustum + i)->x() >= 0 && (int)(frustum + i)->x() < xgrid_num &&
-      (int)(frustum + i)->y() >= 0 && (int)(frustum + i)->y() < ygrid_num &&
-      (int)(frustum + i)->z() >= 0 && (int)(frustum + i)->z() < zgrid_num) {
+      static_cast<int>((frustum + i)->x()) >= 0 && static_cast<int>((frustum + i)->x()) < xgrid_num &&
+      static_cast<int>((frustum + i)->y()) >= 0 && static_cast<int>((frustum + i)->y()) < ygrid_num &&
+      static_cast<int>((frustum + i)->z()) >= 0 && static_cast<int>((frustum + i)->z()) < zgrid_num) {
       kept.push_back(i);
     }
   }
@@ -323,7 +323,7 @@ void BEVDet::InitViewTransformer(
 
   for (int i = 0; i < valid_feat_num; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
-    ranks_bev_host[i] = (int)p.z() * xgrid_num * ygrid_num + (int)p.y() * xgrid_num + (int)p.x();
+    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num * ygrid_num + static_cast<int>(p.y()) * xgrid_num + static_cast<int>(p.x());
     order[i] = i;
   }
 
@@ -436,7 +436,7 @@ void save_tensor(size_t size, const void * ptr, const std::string & file)
   float * tensor = new float[size];
   CHECK_CUDA(cudaMemcpy(tensor, ptr, size * sizeof(float), cudaMemcpyDeviceToHost));
   std::ofstream out(file, std::ios::out | std::ios::binary);
-  out.write((char *)tensor, size * sizeof(float));
+  out.write(reinterpret_cast<char *>(tensor), size * sizeof(float));
   out.close();
   delete[] tensor;
 }
@@ -463,7 +463,7 @@ int BEVDet::DeserializeTRTEngine(
 
   engine_stream.seekg(0, std::ios::beg);
   void * engine_str = malloc(engine_size);
-  engine_stream.read((char *)engine_str, engine_size);
+  engine_stream.read(reinterpret_cast<char *>(engine_str), engine_size);
 
   nvinfer1::ICudaEngine * engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
   if (engine == nullptr) {
@@ -499,7 +499,7 @@ void BEVDet::GetAdjBEVFeature(
     size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
 
     CHECK_CUDA(cudaMemcpy(
-      (char *)trt_buffer_dev[buffer_map["adj_feats"]] + i * buf_size, adj_buffer, buf_size,
+      reinterpret_cast<char *>(trt_buffer_dev[buffer_map["adj_feats"]]) + i * buf_size, adj_buffer, buf_size,
       cudaMemcpyDeviceToDevice));
 
     Eigen::Quaternion<float> adj_ego2global_rot;
@@ -508,7 +508,7 @@ void BEVDet::GetAdjBEVFeature(
 
     GetCurr2AdjTransform(
       ego2global_rot, adj_ego2global_rot, ego2global_trans, adj_ego2global_trans,
-      (float *)trt_buffer_dev[buffer_map["transforms"]] + i * transform_size);
+      reinterpret_cast<float *>(trt_buffer_dev[buffer_map["transforms"]]) + i * transform_size);
   }
   CHECK_CUDA(cudaMemcpy(
     trt_buffer_dev[buffer_map["flag"]], &flag, trt_buffer_sizes[buffer_map["flag"]],
@@ -577,10 +577,6 @@ int BEVDet::DoInfer(
 {
   printf("-------------------%d-------------------\n", idx + 1);
 
-  printf(
-    "scenes_token : %s, timestamp : %lld\n", cam_data.param.scene_token.data(),
-    cam_data.param.timestamp);
-
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
     trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, trt_buffer_sizes[buffer_map["images"]],
@@ -650,11 +646,11 @@ void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtF
 
   nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger);
 
-  if (!parser->parseFromFile(onnxFile.c_str(), int(g_logger.reportable_severity))) {
+  if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger.reportable_severity))) {
     std::cout << std::string("Failed parsing .onnx file!") << std::endl;
     for (int i = 0; i < parser->getNbErrors(); ++i) {
       auto * error = parser->getError(i);
-      std::cout << std::to_string(int(error->code())) << std::string(":")
+      std::cout << std::to_string(static_cast<int>(error->code())) << std::string(":")
                 << std::string(error->desc()) << std::endl;
     }
 
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/tensorrt_bevdet/src/bevdet_node.cpp
index 182dad76645b2..5887e0d17b7fb 100644
--- a/perception/tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet_node.cpp
@@ -160,7 +160,7 @@ TRTBEVDetNode::TRTBEVDetNode(
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful create bevdet!");
 
-  CHECK_CUDA(cudaMalloc((void **)&imgs_dev_, img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
+  CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&imgs_dev_), img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
 
   pub_cloud_ = this->create_publisher<sensor_msgs::msg::PointCloud2>(
     "~/output/painting_cloud", rclcpp::SensorDataQoS());
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index e5443a7e33869..a5b37213f98a6 100644
--- a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -36,7 +36,7 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
   cinfo.err = jpeg_std_error(&jerr.pub);
 
   jerr.pub.error_exit = [](j_common_ptr cinfo) {
-    jpeg_error_handler * myerr = (jpeg_error_handler *)cinfo->err;
+    jpeg_error_handler * myerr = reinterpret_cast<jpeg_error_handler *>(cinfo->err);
     longjmp(myerr->setjmp_buffer, 1);
   };
 
@@ -53,7 +53,7 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
     printf("buffer size is 0");
     return EXIT_FAILURE;
   }
-  jpeg_mem_src(&cinfo, (uchar *)buffer.data(), buffer.size());
+  jpeg_mem_src(&cinfo, reinterpret_cast<const unsigned char *>(buffer.data()), buffer.size());
   if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
     printf("\033[31mFailed to read jpeg header\033[0m\n");
     return EXIT_FAILURE;
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index fa4188488d04c..234d5537a1ab3 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -28,7 +28,7 @@ using std::chrono::high_resolution_clock;
 camParams::camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name)
 : N_img(n)
 {
-  if ((size_t)n != cams_name.size()) {
+  if (static_cast<size_t>(n) != cams_name.size()) {
     std::cerr << "Error! Need " << n << " camera param, bug given " << cams_name.size()
               << " camera names!" << std::endl;
   }
@@ -38,7 +38,7 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   lidar2ego_rot = fromYamlQuater(config["lidar2ego_rotation"]);
   lidar2ego_trans = fromYamlTrans(config["lidar2ego_translation"]);
 
-  timestamp = config["timestamp"].as<unsigned long long>();
+  timestamp = config["timestamp"].as<std::int64_t>();
   scene_token = config["scene_token"].as<std::string>();
 
   imgs_file.clear();
@@ -106,7 +106,7 @@ DataLoader::DataLoader(
     lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
   }
 
-  CHECK_CUDA(cudaMalloc((void **)&imgs_dev, n_img * img_h * img_w * 3 * sizeof(uchar)));
+  CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&imgs_dev), n_img * img_h * img_w * 3 * sizeof(uchar)));
 }
 
 const camsData & DataLoader::data(int idx, bool time_order)
@@ -153,7 +153,7 @@ int read_image(std::string & image_names, std::vector<char> & raw_data)
 
   std::streamsize file_size = input.tellg();
   input.seekg(0, std::ios::beg);
-  if (raw_data.size() < (size_t)file_size) {
+  if (raw_data.size() < static_cast<size_t>(file_size)) {
     raw_data.resize(file_size);
   }
   if (!input.read(raw_data.data(), file_size)) {
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
index 2c0f214f9d823..042f8f3769b2b 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -314,7 +314,7 @@ __global__ void boxes_iou_bev_kernel(
 
 __global__ void RotatedNmsKernel(
   const int boxes_num, const float nms_overlap_thresh, const float * boxes,
-  unsigned long long * mask)
+  std::int64_t * mask)
 {
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
@@ -352,7 +352,7 @@ __global__ void RotatedNmsKernel(
     const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
 
     int i = 0;
-    unsigned long long t = 0;
+    std::int64_t t = 0;
     int start = 0;
     if (row_start == col_start) {
       start = threadIdx.x + 1;
@@ -369,7 +369,7 @@ __global__ void RotatedNmsKernel(
 
 __global__ void NmsKernel(
   const int boxes_num, const float nms_overlap_thresh, const float * boxes,
-  unsigned long long * mask)
+  std::int64_t * mask)
 {
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
@@ -407,7 +407,7 @@ __global__ void NmsKernel(
     const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
 
     int i = 0;
-    unsigned long long t = 0;
+    std::int64_t t = 0;
     int start = 0;
     if (row_start == col_start) {
       start = threadIdx.x + 1;
@@ -424,7 +424,7 @@ __global__ void NmsKernel(
 
 __global__ void RotatedNmsWithIndicesKernel(
   const int boxes_num, const float nms_overlap_thresh, const float * res_box,
-  const int * res_sorted_indices, unsigned long long * mask)
+  const int * res_sorted_indices, std::int64_t * mask)
 {
   const int row_start = blockIdx.y;
   const int col_start = blockIdx.x;
@@ -460,7 +460,7 @@ __global__ void RotatedNmsWithIndicesKernel(
     cur_box[6] = res_box[row_actual_idx * kBoxBlockSize + 6];
 
     int i = 0;
-    unsigned long long t = 0;
+    std::int64_t t = 0;
     int start = 0;
     if (row_start == col_start) {
       start = threadIdx.x + 1;  // Isn't it right now to compare with [0~threadIdx. x)? FIXME
@@ -565,7 +565,7 @@ void boxes_iou_bev_launcher(
 }
 
 void nms_launcher(
-  const float * boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh)
+  const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
 {
   dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
@@ -574,7 +574,7 @@ void nms_launcher(
 }
 
 void nms_normal_launcher(
-  const float * boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh)
+  const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
 {
   dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
@@ -589,26 +589,26 @@ Iou3dNmsCuda::Iou3dNmsCuda(
 
 int Iou3dNmsCuda::DoIou3dNms(
   const int box_num_pre, const float * res_box, const int * res_sorted_indices,
-  long * host_keep_data)
+  std::int64_t * host_keep_data)
 {
   const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);  // How many blocks are divided
   // printf("box_num_pre=%d, col_blocks=%d\n", box_num_pre, col_blocks);
-  unsigned long long * dev_mask = NULL;
-  cudaMalloc((void **)&dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long));  //
+  std::int64_t * dev_mask = NULL;
+  cudaMalloc((void **)&dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t));  //
   // THREADS_PER_BLOCK_NMS 掩码的长度
   dim3 blocks(DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS), DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
   RotatedNmsWithIndicesKernel<<<blocks, threads>>>(
     box_num_pre, nms_overlap_thresh_, res_box, res_sorted_indices, dev_mask);
 
-  unsigned long long host_mask[box_num_pre * col_blocks];
+  std::int64_t host_mask[box_num_pre * col_blocks];
   cudaMemcpy(
-    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(unsigned long long),
+    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t),
     cudaMemcpyDeviceToHost);
   cudaFree(dev_mask);
 
-  unsigned long long host_remv[col_blocks];
-  memset(host_remv, 0, col_blocks * sizeof(unsigned long long));
+  std::int64_t host_remv[col_blocks];
+  memset(host_remv, 0, col_blocks * sizeof(std::int64_t));
   int num_to_keep = 0;
   for (int i = 0; i < box_num_pre; i++) {
     int nblock = i / THREADS_PER_BLOCK_NMS;
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index cf344ebe8b38a..a22d7d252e985 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -22,22 +22,22 @@ using std::chrono::high_resolution_clock;
 
 int dev_malloc(void ** p, size_t s)
 {
-  return (int)cudaMalloc(p, s);
+  return static_cast<int>(cudaMalloc(p, s));
 }
 
 int dev_free(void * p)
 {
-  return (int)cudaFree(p);
+  return static_cast<int>(cudaFree(p));
 }
 
 int host_malloc(void ** p, size_t s, unsigned int f)
 {
-  return (int)cudaHostAlloc(p, s, f);
+  return static_cast<int>(cudaHostAlloc(p, s, f));
 }
 
 int host_free(void * p)
 {
-  return (int)cudaFreeHost(p);
+  return static_cast<int>(cudaFreeHost(p));
 }
 
 int prepare_buffers(
@@ -52,7 +52,7 @@ int prepare_buffers(
 
   for (size_t i = 0; i < file_data.size(); i++) {
     CHECK_NVJPEG(nvjpegGetImageInfo(
-      share_param.nvjpeg_handle, (uchar *)file_data[i].data(), file_data[i].size(), &channels,
+      share_param.nvjpeg_handle, reinterpret_cast<uchar *>(file_data[i].data()), file_data[i].size(), &channels,
       &subsampling, widths, heights));
 
     int mul = 1;
@@ -82,7 +82,7 @@ int prepare_buffers(
           if (ibuf[i].channel[c]) {
             CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
           }
-          CHECK_CUDA(cudaMalloc((void **)&ibuf[i].channel[c], sz));
+          CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&ibuf[i].channel[c]), sz));
           isz[i].pitch[c] = sz;
         }
       }
@@ -146,13 +146,13 @@ int get_img(
   size_t step = width * height;
 
   CHECK_CUDA(cudaMemcpy2D(
-    img + 0 * step, (size_t)width, d_chanR, (size_t)pitchR, width, height,
+    img + 0 * step, static_cast<size_t>(width), d_chanR, static_cast<size_t>(pitchR), width, height,
     cudaMemcpyDeviceToDevice));
   CHECK_CUDA(cudaMemcpy2D(
-    img + 1 * step, (size_t)width, d_chanG, (size_t)pitchG, width, height,
+    img + 1 * step, static_cast<size_t>(width), d_chanG, static_cast<size_t>(pitchG), width, height,
     cudaMemcpyDeviceToDevice));
   CHECK_CUDA(cudaMemcpy2D(
-    img + 2 * step, (size_t)width, d_chanB, (size_t)pitchB, width, height,
+    img + 2 * step, static_cast<size_t>(width), d_chanB, static_cast<size_t>(pitchB), width, height,
     cudaMemcpyDeviceToDevice));
 
   return EXIT_SUCCESS;
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/tensorrt_bevdet/src/postprocess.cu
index 8a237dfbcd098..b8ba6b3cadd1a 100644
--- a/perception/tensorrt_bevdet/src/postprocess.cu
+++ b/perception/tensorrt_bevdet/src/postprocess.cu
@@ -108,7 +108,7 @@ PostprocessGPU::PostprocessGPU(
   CHECK_CUDA(cudaMallocHost((void **)&score_host, nms_pre_maxnum * sizeof(float)));
   CHECK_CUDA(cudaMallocHost((void **)&cls_host, map_size * sizeof(float)));
   CHECK_CUDA(cudaMallocHost((void **)&sorted_indices_host, nms_pre_maxnum * sizeof(int)));
-  CHECK_CUDA(cudaMallocHost((void **)&keep_data_host, nms_pre_maxnum * sizeof(long)));
+  CHECK_CUDA(cudaMallocHost((void **)&keep_data_host, nms_pre_maxnum * sizeof(std::int64_t)));
 
   CHECK_CUDA(cudaMemcpy(
     nms_rescale_factor_dev, nms_rescale_factor.data(), class_num * sizeof(float),

From 35561b988223b3af72d637d5c101df01e34c4d50 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 31 Jul 2024 10:31:38 +0000
Subject: [PATCH 10/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/bevdet.hpp    |  3 ++-
 perception/tensorrt_bevdet/include/iou3d_nms.hpp |  3 ++-
 perception/tensorrt_bevdet/src/bevdet.cpp        | 16 ++++++++++------
 perception/tensorrt_bevdet/src/bevdet_node.cpp   |  3 ++-
 perception/tensorrt_bevdet/src/data.cpp          |  3 ++-
 perception/tensorrt_bevdet/src/iou3d_nms.cu      | 12 ++++--------
 perception/tensorrt_bevdet/src/nvjpegdecoder.cpp |  4 ++--
 7 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 0f97035f7ca18..d47f5f7a90e31 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -68,7 +68,8 @@ class adjFrame
     while (iters--) {
       last = (last + 1) % n;
       CHECK_CUDA(cudaMemcpy(
-        reinterpret_cast<char *>(adj_buffer) + last * buf_size, curr_buffer, buf_size, cudaMemcpyDeviceToDevice));
+        reinterpret_cast<char *>(adj_buffer) + last * buf_size, curr_buffer, buf_size,
+        cudaMemcpyDeviceToDevice));
       scenes_token[last] = curr_token;
       ego2global_rot[last] = _ego2global_rot;
       ego2global_trans[last] = _ego2global_trans;
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index c5c8870c4b457..1a2cb37b4a5a8 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -20,6 +20,8 @@ All Rights Reserved 2019-2022.
 #ifndef IOU3D_NMS_HPP_
 #define IOU3D_NMS_HPP_
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -31,7 +33,6 @@ All Rights Reserved 2019-2022.
 
 #include <iostream>
 #include <vector>
-#include "common.hpp"
 
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(std::int64_t) * 8;
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/tensorrt_bevdet/src/bevdet.cpp
index de99bc6c0b1b4..2cf249b12fdfe 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet.cpp
@@ -308,9 +308,12 @@ void BEVDet::InitViewTransformer(
   std::vector<int> kept;
   for (int i = 0; i < num_points; i++) {
     if (
-      static_cast<int>((frustum + i)->x()) >= 0 && static_cast<int>((frustum + i)->x()) < xgrid_num &&
-      static_cast<int>((frustum + i)->y()) >= 0 && static_cast<int>((frustum + i)->y()) < ygrid_num &&
-      static_cast<int>((frustum + i)->z()) >= 0 && static_cast<int>((frustum + i)->z()) < zgrid_num) {
+      static_cast<int>((frustum + i)->x()) >= 0 &&
+      static_cast<int>((frustum + i)->x()) < xgrid_num &&
+      static_cast<int>((frustum + i)->y()) >= 0 &&
+      static_cast<int>((frustum + i)->y()) < ygrid_num &&
+      static_cast<int>((frustum + i)->z()) >= 0 &&
+      static_cast<int>((frustum + i)->z()) < zgrid_num) {
       kept.push_back(i);
     }
   }
@@ -323,7 +326,8 @@ void BEVDet::InitViewTransformer(
 
   for (int i = 0; i < valid_feat_num; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
-    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num * ygrid_num + static_cast<int>(p.y()) * xgrid_num + static_cast<int>(p.x());
+    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num * ygrid_num +
+                        static_cast<int>(p.y()) * xgrid_num + static_cast<int>(p.x());
     order[i] = i;
   }
 
@@ -499,8 +503,8 @@ void BEVDet::GetAdjBEVFeature(
     size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
 
     CHECK_CUDA(cudaMemcpy(
-      reinterpret_cast<char *>(trt_buffer_dev[buffer_map["adj_feats"]]) + i * buf_size, adj_buffer, buf_size,
-      cudaMemcpyDeviceToDevice));
+      reinterpret_cast<char *>(trt_buffer_dev[buffer_map["adj_feats"]]) + i * buf_size, adj_buffer,
+      buf_size, cudaMemcpyDeviceToDevice));
 
     Eigen::Quaternion<float> adj_ego2global_rot;
     Eigen::Translation3f adj_ego2global_trans;
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/tensorrt_bevdet/src/bevdet_node.cpp
index 5887e0d17b7fb..bc293a4081a0a 100644
--- a/perception/tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/tensorrt_bevdet/src/bevdet_node.cpp
@@ -160,7 +160,8 @@ TRTBEVDetNode::TRTBEVDetNode(
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful create bevdet!");
 
-  CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&imgs_dev_), img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
+  CHECK_CUDA(cudaMalloc(
+    reinterpret_cast<void **>(&imgs_dev_), img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
 
   pub_cloud_ = this->create_publisher<sensor_msgs::msg::PointCloud2>(
     "~/output/painting_cloud", rclcpp::SensorDataQoS());
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index 234d5537a1ab3..9bab173898d97 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -106,7 +106,8 @@ DataLoader::DataLoader(
     lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
   }
 
-  CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&imgs_dev), n_img * img_h * img_w * 3 * sizeof(uchar)));
+  CHECK_CUDA(
+    cudaMalloc(reinterpret_cast<void **>(&imgs_dev), n_img * img_h * img_w * 3 * sizeof(uchar)));
 }
 
 const camsData & DataLoader::data(int idx, bool time_order)
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/tensorrt_bevdet/src/iou3d_nms.cu
index 042f8f3769b2b..1fbf2503be528 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/tensorrt_bevdet/src/iou3d_nms.cu
@@ -313,8 +313,7 @@ __global__ void boxes_iou_bev_kernel(
 }
 
 __global__ void RotatedNmsKernel(
-  const int boxes_num, const float nms_overlap_thresh, const float * boxes,
-  std::int64_t * mask)
+  const int boxes_num, const float nms_overlap_thresh, const float * boxes, std::int64_t * mask)
 {
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
@@ -368,8 +367,7 @@ __global__ void RotatedNmsKernel(
 }
 
 __global__ void NmsKernel(
-  const int boxes_num, const float nms_overlap_thresh, const float * boxes,
-  std::int64_t * mask)
+  const int boxes_num, const float nms_overlap_thresh, const float * boxes, std::int64_t * mask)
 {
   // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
   // params: mask (N, N/THREADS_PER_BLOCK_NMS)
@@ -564,8 +562,7 @@ void boxes_iou_bev_launcher(
 #endif
 }
 
-void nms_launcher(
-  const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
+void nms_launcher(const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
 {
   dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
   dim3 threads(THREADS_PER_BLOCK_NMS);
@@ -603,8 +600,7 @@ int Iou3dNmsCuda::DoIou3dNms(
 
   std::int64_t host_mask[box_num_pre * col_blocks];
   cudaMemcpy(
-    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t),
-    cudaMemcpyDeviceToHost);
+    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t), cudaMemcpyDeviceToHost);
   cudaFree(dev_mask);
 
   std::int64_t host_remv[col_blocks];
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index a22d7d252e985..f017d3a40ca76 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -52,8 +52,8 @@ int prepare_buffers(
 
   for (size_t i = 0; i < file_data.size(); i++) {
     CHECK_NVJPEG(nvjpegGetImageInfo(
-      share_param.nvjpeg_handle, reinterpret_cast<uchar *>(file_data[i].data()), file_data[i].size(), &channels,
-      &subsampling, widths, heights));
+      share_param.nvjpeg_handle, reinterpret_cast<uchar *>(file_data[i].data()),
+      file_data[i].size(), &channels, &subsampling, widths, heights));
 
     int mul = 1;
     // in the case of interleaved RGB output, write only to single channel, but

From 71712bea2de28d9896bc986d9faadd02dd0afaa7 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 31 Jul 2024 19:51:45 +0800
Subject: [PATCH 11/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/alignbev_plugin.hpp               |  2 +-
 perception/tensorrt_bevdet/include/bevdet.hpp |  1 +
 .../tensorrt_bevdet/include/bevdet_node.hpp   |  2 +
 perception/tensorrt_bevdet/include/data.hpp   | 49 +----------
 .../tensorrt_bevdet/include/postprocess.hpp   |  1 +
 perception/tensorrt_bevdet/src/data.cpp       | 84 -------------------
 .../tensorrt_bevdet/src/nvjpegdecoder.cpp     |  1 +
 7 files changed, 7 insertions(+), 133 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
index 94352c8ee3ba6..be249700b6516 100644
--- a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
+++ b/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
@@ -43,7 +43,7 @@ class AlignBEVPlugin : public IPluginV2DynamicExt
 
 public:
   AlignBEVPlugin() = delete;
-  AlignBEVPlugin(const std::string & name);
+  explicit AlignBEVPlugin(const std::string & name);
   AlignBEVPlugin(const std::string & name, const void * buffer, size_t length);
   ~AlignBEVPlugin();
 
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 0f97035f7ca18..d181a596e0532 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -29,6 +29,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <algorithm>
 
 class adjFrame
 {
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index 64d6953b807d0..0e1428849ae4b 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -48,6 +48,8 @@
 #include <map>
 #include <string>
 #include <thread>
+#include <vector>
+#include <memory>
 
 typedef pcl::PointXYZI PointT;
 
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
index 0dbf95ef3e124..341104a4d30ad 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -23,6 +23,7 @@
 #include <yaml-cpp/yaml.h>
 
 #include <vector>
+#include <string> 
 
 struct camParams
 {
@@ -55,54 +56,6 @@ struct camsData
   uchar * imgs_dev;
 };
 
-class DataLoader
-{
-public:
-  DataLoader() = default;
-  DataLoader(
-    int _n_img, int _h, int _w, const std::string & _data_infos_path,
-    const std::vector<std::string> & _cams_name, bool _sep = true);
-
-  const std::vector<Eigen::Matrix3f> & get_cams_intrin() const { return cams_intrin; }
-  const std::vector<Eigen::Quaternion<float>> & get_cams2ego_rot() const { return cams2ego_rot; }
-  const std::vector<Eigen::Translation3f> & get_cams2ego_trans() const { return cams2ego_trans; }
-
-  const Eigen::Quaternion<float> & get_lidar2ego_rot() const { return lidar2ego_rot; }
-
-  const Eigen::Translation3f & get_lidar2ego_trans() const { return lidar2ego_trans; }
-
-  int size() { return sample_num; }
-
-  const camsData & data(int idx, bool time_order = true);
-  ~DataLoader();
-
-private:
-  std::vector<int> time_sequence;
-  std::string data_infos_path;
-  int sample_num;
-
-  std::vector<std::string> cams_name;
-  int n_img;
-  int img_h;
-  int img_w;
-
-  std::vector<camParams> cams_param;
-  camsData cams_data;
-
-  std::vector<Eigen::Matrix3f> cams_intrin;
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-  std::vector<Eigen::Translation3f> cams2ego_trans;
-  Eigen::Quaternion<float> lidar2ego_rot;
-  Eigen::Translation3f lidar2ego_trans;
-
-#ifdef __HAVE_NVJPEG__
-  nvjpegDecoder nvdecoder;
-#endif
-  uchar * imgs_dev;
-  std::vector<std::vector<char>> imgs_data;
-  bool separate;
-};
-
 Eigen::Translation3f fromYamlTrans(YAML::Node x);
 Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
 Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 0d60dd77eaa07..12c4cf993379d 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -17,6 +17,7 @@
 #include "iou3d_nms.hpp"
 
 #include <vector>
+#include <memory>
 
 class PostprocessGPU
 {
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/tensorrt_bevdet/src/data.cpp
index 234d5537a1ab3..810ab7cc0e8b1 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/tensorrt_bevdet/src/data.cpp
@@ -58,90 +58,6 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   }
 }
 
-DataLoader::DataLoader(
-  int _n_img, int _h, int _w, const std::string & _data_infos_path,
-  const std::vector<std::string> & _cams_name, bool _sep)
-: n_img(_n_img),
-  cams_intrin(_n_img),
-  cams2ego_rot(_n_img),
-  cams2ego_trans(_n_img),
-#ifdef __HAVE_NVJPEG__
-  nvdecoder(_n_img, DECODE_BGR),
-#endif
-  img_h(_h),
-  img_w(_w),
-  cams_name(_cams_name),
-  data_infos_path(_data_infos_path),
-  separate(_sep)
-{
-  YAML::Node temp_seq = YAML::LoadFile(data_infos_path + "/time_sequence.yaml");
-  time_sequence = temp_seq["time_sequence"].as<std::vector<int>>();
-  sample_num = time_sequence.size();
-
-  cams_param.resize(sample_num);
-
-  if (separate == false) {
-    YAML::Node infos = YAML::LoadFile(data_infos_path + "/samples_info.yaml");
-
-    for (size_t i = 0; i < cams_name.size(); i++) {
-      cams_intrin[i] = fromYamlMatrix3f(infos[0]["cams"][cams_name[i]]["cam_intrinsic"]);
-      cams2ego_rot[i] = fromYamlQuater(infos[0]["cams"][cams_name[i]]["sensor2ego_rotation"]);
-      cams2ego_trans[i] = fromYamlTrans(infos[0]["cams"][cams_name[i]]["sensor2ego_translation"]);
-    }
-    lidar2ego_rot = fromYamlQuater(infos[0]["lidar2ego_rotation"]);
-    lidar2ego_trans = fromYamlTrans(infos[0]["lidar2ego_translation"]);
-
-    for (int i = 0; i < sample_num; i++) {
-      cams_param[i] = camParams(infos[i], n_img, cams_name);
-    }
-  } else {
-    YAML::Node config0 = YAML::LoadFile(data_infos_path + "/samples_info/sample0000.yaml");
-
-    for (size_t i = 0; i < cams_name.size(); i++) {
-      cams_intrin[i] = fromYamlMatrix3f(config0["cams"][cams_name[i]]["cam_intrinsic"]);
-      cams2ego_rot[i] = fromYamlQuater(config0["cams"][cams_name[i]]["sensor2ego_rotation"]);
-      cams2ego_trans[i] = fromYamlTrans(config0["cams"][cams_name[i]]["sensor2ego_translation"]);
-    }
-    lidar2ego_rot = fromYamlQuater(config0["lidar2ego_rotation"]);
-    lidar2ego_trans = fromYamlTrans(config0["lidar2ego_translation"]);
-  }
-
-  CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&imgs_dev), n_img * img_h * img_w * 3 * sizeof(uchar)));
-}
-
-const camsData & DataLoader::data(int idx, bool time_order)
-{
-  if (time_order) {
-    idx = time_sequence[idx];
-  }
-  printf("------time_sequence idx : %d ---------\n", idx);
-  if (separate == false) {
-    cams_data.param = cams_param[idx];
-  } else {
-    char str_idx[50];
-    sprintf(str_idx, "/samples_info/sample%04d.yaml", idx);
-    YAML::Node config_idx = YAML::LoadFile(data_infos_path + str_idx);
-    cams_data.param = camParams(config_idx, n_img, cams_name);
-  }
-  imgs_data.clear();
-  if (read_sample(cams_data.param.imgs_file, imgs_data)) {
-    exit(1);
-  }
-#ifdef __HAVE_NVJPEG__
-  nvdecoder.decode(imgs_data, imgs_dev);
-#else
-  decode_cpu(imgs_data, imgs_dev, img_w, img_h);
-  printf("decode on cpu!\n");
-#endif
-  cams_data.imgs_dev = imgs_dev;
-  return cams_data;
-}
-
-DataLoader::~DataLoader()
-{
-  CHECK_CUDA(cudaFree(imgs_dev));
-}
-
 int read_image(std::string & image_names, std::vector<char> & raw_data)
 {
   std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index a22d7d252e985..264f19e4ef2fd 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -68,6 +68,7 @@ int prepare_buffers(
       widths[1] = widths[2] = widths[0];
       heights[1] = heights[2] = heights[0];
     }
+    else{}
 
     if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
       img_width[i] = widths[0];

From cc1e3d8e833003a14d1953bd04f75292d748af51 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 31 Jul 2024 11:59:30 +0000
Subject: [PATCH 12/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/bevdet.hpp      | 2 +-
 perception/tensorrt_bevdet/include/bevdet_node.hpp | 2 +-
 perception/tensorrt_bevdet/include/data.hpp        | 2 +-
 perception/tensorrt_bevdet/include/iou3d_nms.hpp   | 3 ++-
 perception/tensorrt_bevdet/include/postprocess.hpp | 2 +-
 perception/tensorrt_bevdet/src/nvjpegdecoder.cpp   | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 07cbe4189ca6b..3e1210f9b86e6 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -25,11 +25,11 @@
 #include <NvOnnxParser.h>
 #include <yaml-cpp/yaml.h>
 
+#include <algorithm>
 #include <map>
 #include <memory>
 #include <string>
 #include <vector>
-#include <algorithm>
 
 class adjFrame
 {
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/tensorrt_bevdet/include/bevdet_node.hpp
index 0e1428849ae4b..5f7b945819918 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet_node.hpp
@@ -46,10 +46,10 @@
 #include <fstream>
 #include <iostream>
 #include <map>
+#include <memory>
 #include <string>
 #include <thread>
 #include <vector>
-#include <memory>
 
 typedef pcl::PointXYZI PointT;
 
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/tensorrt_bevdet/include/data.hpp
index 341104a4d30ad..cc448245f969e 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/tensorrt_bevdet/include/data.hpp
@@ -22,8 +22,8 @@
 
 #include <yaml-cpp/yaml.h>
 
+#include <string>
 #include <vector>
-#include <string> 
 
 struct camParams
 {
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
index c5c8870c4b457..1a2cb37b4a5a8 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/tensorrt_bevdet/include/iou3d_nms.hpp
@@ -20,6 +20,8 @@ All Rights Reserved 2019-2022.
 #ifndef IOU3D_NMS_HPP_
 #define IOU3D_NMS_HPP_
 
+#include "common.hpp"
+
 #include <stdio.h>
 #include <thrust/count.h>
 #include <thrust/device_vector.h>
@@ -31,7 +33,6 @@ All Rights Reserved 2019-2022.
 
 #include <iostream>
 #include <vector>
-#include "common.hpp"
 
 const int THREADS_PER_BLOCK = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(std::int64_t) * 8;
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 12c4cf993379d..0262040d07e4b 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -16,8 +16,8 @@
 
 #include "iou3d_nms.hpp"
 
-#include <vector>
 #include <memory>
+#include <vector>
 
 class PostprocessGPU
 {
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index 1f5f734137111..0184583416ef8 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -67,8 +67,8 @@ int prepare_buffers(
       channels = 3;
       widths[1] = widths[2] = widths[0];
       heights[1] = heights[2] = heights[0];
+    } else {
     }
-    else{}
 
     if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
       img_width[i] = widths[0];

From b2d7ef047ef810c9839060acda95d7bbfa9e3498 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 31 Jul 2024 20:17:49 +0800
Subject: [PATCH 13/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/tensorrt_bevdet/src/nvjpegdecoder.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index 0184583416ef8..bcc93e7a66358 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -61,14 +61,12 @@ int prepare_buffers(
     if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
       channels = 1;
       mul = 3;
-    }
-    // in the case of rgb create 3 buffers with sizes of original image
-    else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
+    }else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
+      // in the case of rgb create 3 buffers with sizes of original image
       channels = 3;
       widths[1] = widths[2] = widths[0];
       heights[1] = heights[2] = heights[0];
-    } else {
-    }
+    } 
 
     if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
       img_width[i] = widths[0];

From eda145298b300ad140f142a6bb97cc5cd7f2182d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 31 Jul 2024 12:20:18 +0000
Subject: [PATCH 14/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/src/nvjpegdecoder.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
index bcc93e7a66358..a85256ba0c15a 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -61,12 +61,12 @@ int prepare_buffers(
     if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
       channels = 1;
       mul = 3;
-    }else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
+    } else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
       // in the case of rgb create 3 buffers with sizes of original image
       channels = 3;
       widths[1] = widths[2] = widths[0];
       heights[1] = heights[2] = heights[0];
-    } 
+    }
 
     if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
       img_width[i] = widths[0];

From b1d1162707ddc2c4caa88c917023c87a23be7a62 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 1 Aug 2024 13:57:05 +0800
Subject: [PATCH 15/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/tensorrt_bevdet/include/bevdet.hpp      | 2 +-
 perception/tensorrt_bevdet/include/common.hpp      | 2 +-
 perception/tensorrt_bevdet/include/postprocess.hpp | 1 +
 perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp | 2 +-
 perception/tensorrt_bevdet/src/preprocess.cu       | 1 -
 5 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 3e1210f9b86e6..2150727d0b5da 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -14,10 +14,10 @@
 #ifndef BEVDET_HPP_
 #define BEVDET_HPP_
 
-#include "NvInfer.h"
 #include "common.hpp"
 #include "data.hpp"
 #include "postprocess.hpp"
+#include "NvInfer.h"
 
 #include <Eigen/Core>
 #include <Eigen/Geometry>
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
index 5142600e7523b..f111f1c5c4e4a 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -16,10 +16,10 @@
 
 #include <NvInfer.h>
 #include <assert.h>
+#include <thrust/sort.h>
 #include <cuda_runtime.h>
 #include <stdio.h>
 #include <stdlib.h>
-
 #include <iostream>
 #include <string>
 
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 0262040d07e4b..0b5e0e9d025ea 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -15,6 +15,7 @@
 #define POSTPROCESS_HPP_
 
 #include "iou3d_nms.hpp"
+#include "common.hpp"
 
 #include <memory>
 #include <vector>
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index a5b37213f98a6..c7ca8b063fd21 100644
--- a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -53,7 +53,7 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
     printf("buffer size is 0");
     return EXIT_FAILURE;
   }
-  jpeg_mem_src(&cinfo, reinterpret_cast<const unsigned char *>(buffer.data()), buffer.size());
+  jpeg_mem_src(&cinfo, reinterpret_cast<const uchar *>(buffer.data()), buffer.size());
   if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
     printf("\033[31mFailed to read jpeg header\033[0m\n");
     return EXIT_FAILURE;
diff --git a/perception/tensorrt_bevdet/src/preprocess.cu b/perception/tensorrt_bevdet/src/preprocess.cu
index a457817d9601c..2c454403d38de 100644
--- a/perception/tensorrt_bevdet/src/preprocess.cu
+++ b/perception/tensorrt_bevdet/src/preprocess.cu
@@ -11,7 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "common.hpp"
 #include "preprocess.hpp"
 
 #include <thrust/fill.h>

From 9ba248873aa81ad1e19c54333f11907403a8bc38 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 05:59:24 +0000
Subject: [PATCH 16/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/include/bevdet.hpp      | 2 +-
 perception/tensorrt_bevdet/include/common.hpp      | 3 ++-
 perception/tensorrt_bevdet/include/postprocess.hpp | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/tensorrt_bevdet/include/bevdet.hpp
index 2150727d0b5da..3e1210f9b86e6 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/tensorrt_bevdet/include/bevdet.hpp
@@ -14,10 +14,10 @@
 #ifndef BEVDET_HPP_
 #define BEVDET_HPP_
 
+#include "NvInfer.h"
 #include "common.hpp"
 #include "data.hpp"
 #include "postprocess.hpp"
-#include "NvInfer.h"
 
 #include <Eigen/Core>
 #include <Eigen/Geometry>
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/tensorrt_bevdet/include/common.hpp
index f111f1c5c4e4a..28a1698a579e2 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/tensorrt_bevdet/include/common.hpp
@@ -16,10 +16,11 @@
 
 #include <NvInfer.h>
 #include <assert.h>
-#include <thrust/sort.h>
 #include <cuda_runtime.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <thrust/sort.h>
+
 #include <iostream>
 #include <string>
 
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/tensorrt_bevdet/include/postprocess.hpp
index 0b5e0e9d025ea..cb401239f5746 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/tensorrt_bevdet/include/postprocess.hpp
@@ -14,8 +14,8 @@
 #ifndef POSTPROCESS_HPP_
 #define POSTPROCESS_HPP_
 
-#include "iou3d_nms.hpp"
 #include "common.hpp"
+#include "iou3d_nms.hpp"
 
 #include <memory>
 #include <vector>

From 70066bc5c85e8baf2b325f4f5952bed02f3b6f94 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 1 Aug 2024 15:58:31 +0800
Subject: [PATCH 17/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/tensorrt_bevdet/README.md | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/perception/tensorrt_bevdet/README.md b/perception/tensorrt_bevdet/README.md
index a80dcc7a9ac7d..4306e9297c3d6 100644
--- a/perception/tensorrt_bevdet/README.md
+++ b/perception/tensorrt_bevdet/README.md
@@ -33,8 +33,19 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.
 
-You can traine models by refer below links.
+## Trained Models
 
-[BEVDet](https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1)
+You can download the onnx format of trained models by clicking on the links below.
 
-[BEVDet export onnx](https://github.com/LCH1238/BEVDet/tree/export)
+- BEVDet: [bevdet_one_lt_d.onnx](https://drive.google.com/file/d/1eMGJfdCVlDPBphBTjMcnIh3wdW7Q7WZB/view?usp=sharing)
+
+The model was trained in NuScenes database for 20 epochs.
+
+If you want to train model using the [TIER IV's internal database(~2600 key frames)](https://drive.google.com/file/d/1UaarK88HZu09sf7Ix-bEVl9zGNGFwTVL/view?usp=sharing), please refer to the following repositories:[BEVDet adapted to TIER IV dataset](https://github.com/cyn-liu/BEVDet/tree/train_export)
+
+## References/External links
+[1] https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1
+
+[2] https://github.com/LCH1238/BEVDet/tree/export
+
+[3] https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one

From 3a840a52c4d8d04af4028585e51f28f1d6544bd7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 08:01:06 +0000
Subject: [PATCH 18/85] style(pre-commit): autofix

---
 perception/tensorrt_bevdet/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/perception/tensorrt_bevdet/README.md b/perception/tensorrt_bevdet/README.md
index 4306e9297c3d6..096bf676a4672 100644
--- a/perception/tensorrt_bevdet/README.md
+++ b/perception/tensorrt_bevdet/README.md
@@ -44,8 +44,9 @@ The model was trained in NuScenes database for 20 epochs.
 If you want to train model using the [TIER IV's internal database(~2600 key frames)](https://drive.google.com/file/d/1UaarK88HZu09sf7Ix-bEVl9zGNGFwTVL/view?usp=sharing), please refer to the following repositories:[BEVDet adapted to TIER IV dataset](https://github.com/cyn-liu/BEVDet/tree/train_export)
 
 ## References/External links
-[1] https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1
 
-[2] https://github.com/LCH1238/BEVDet/tree/export
+[1] <https://github.com/HuangJunJie2017/BEVDet/tree/dev2.1>
 
-[3] https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one
+[2] <https://github.com/LCH1238/BEVDet/tree/export>
+
+[3] <https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one>

From d9e37a8f96c5d753c3117f01a4d4ea086b86c704 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 2 Aug 2024 13:10:21 +0800
Subject: [PATCH 19/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detection/detector/camera_bev_detector.launch.xml  |  4 ++--
 .../CMakeLists.txt                                     |  2 +-
 .../README.md                                          |  0
 .../config/bevdet.param.yaml                           |  0
 .../config/bevdet_r50_4dlongterm_depth.yaml            |  0
 .../config/cams_param.yaml                             |  0
 .../autoware/tensorrt_bevdet}/alignbev_plugin.hpp      |  6 +++---
 .../include/autoware/tensorrt_bevdet}/bevdet.hpp       |  6 +++---
 .../include/autoware/tensorrt_bevdet}/bevdet_node.hpp  |  6 +++---
 .../autoware/tensorrt_bevdet}/bevpool_plugin.hpp       |  6 +++---
 .../include/autoware/tensorrt_bevdet}/common.hpp       |  6 +++---
 .../autoware/tensorrt_bevdet}/cpu_jpegdecoder.hpp      |  6 +++---
 .../include/autoware/tensorrt_bevdet}/data.hpp         |  6 +++---
 .../autoware/tensorrt_bevdet}/gatherbev_plugin.hpp     |  6 +++---
 .../include/autoware/tensorrt_bevdet}/iou3d_nms.hpp    |  6 +++---
 .../autoware/tensorrt_bevdet}/nvjpegdecoder.hpp        |  6 +++---
 .../include/autoware/tensorrt_bevdet}/postprocess.hpp  |  6 +++---
 .../include/autoware/tensorrt_bevdet}/preprocess.hpp   |  6 +++---
 .../autoware/tensorrt_bevdet}/preprocess_plugin.hpp    |  6 +++---
 .../launch/tensorrt_bevdet.launch.xml                  | 10 +++++-----
 .../package.xml                                        |  2 +-
 .../src/alignbev_plugin.cu                             |  4 ++--
 .../src/bevdet.cpp                                     | 10 +++++-----
 .../src/bevdet_node.cpp                                |  2 +-
 .../src/bevpool_plugin.cu                              |  4 ++--
 .../src/cpu_jpegdecoder.cpp                            |  4 ++--
 .../src/data.cpp                                       |  4 ++--
 .../src/gatherbev_plugin.cu                            |  4 ++--
 .../src/iou3d_nms.cu                                   |  2 +-
 .../src/nvjpegdecoder.cpp                              |  2 +-
 .../src/postprocess.cu                                 |  2 +-
 .../src/preprocess.cu                                  |  2 +-
 .../src/preprocess_plugin.cu                           |  4 ++--
 33 files changed, 70 insertions(+), 70 deletions(-)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/CMakeLists.txt (98%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/README.md (100%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/config/bevdet.param.yaml (100%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/config/bevdet_r50_4dlongterm_depth.yaml (100%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/config/cams_param.yaml (100%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/alignbev_plugin.hpp (96%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/bevdet.hpp (97%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/bevdet_node.hpp (96%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/bevpool_plugin.hpp (96%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/common.hpp (96%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/cpu_jpegdecoder.hpp (81%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/data.hpp (92%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/gatherbev_plugin.hpp (95%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/iou3d_nms.hpp (92%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/nvjpegdecoder.hpp (94%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/postprocess.hpp (92%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/preprocess.hpp (82%)
 rename perception/{tensorrt_bevdet/include => autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet}/preprocess_plugin.hpp (95%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/launch/tensorrt_bevdet.launch.xml (75%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/package.xml (96%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/alignbev_plugin.cu (98%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/bevdet.cpp (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/bevdet_node.cpp (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/bevpool_plugin.cu (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/cpu_jpegdecoder.cpp (96%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/data.cpp (97%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/gatherbev_plugin.cu (98%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/iou3d_nms.cu (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/nvjpegdecoder.cpp (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/postprocess.cu (99%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/preprocess.cu (96%)
 rename perception/{tensorrt_bevdet => autoware_tensorrt_bevdet}/src/preprocess_plugin.cu (98%)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index 069ef73a127e5..8ad5f3b965805 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -24,14 +24,14 @@
 
   <!-- Model parameters -->
   <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
-  <arg name="data_path" default="$(env HOME)/autoware_data/tensorrt_bevdet" description="packages data and artifacts directory path"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet" description="packages data and artifacts directory path"/>
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx" description="onnx model file path"/>
   <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine" description="engine model file path"/>
   <!-- BEVDet -->
   <group if="$(eval &quot;'$(var bev_detection_model)'=='bevdet'&quot;)">
     <push-ros-namespace namespace="bevdet"/>
     <group>
-      <include file="$(find-pkg-share tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
+      <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
         <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
         <arg name="input/img_fl" value="$(var input/camera0/image)"/>
         <arg name="input/img_f" value="$(var input/camera1/image)"/>
diff --git a/perception/tensorrt_bevdet/CMakeLists.txt b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
similarity index 98%
rename from perception/tensorrt_bevdet/CMakeLists.txt
rename to perception/autoware_tensorrt_bevdet/CMakeLists.txt
index 1c1fec3785dad..a383274e5a617 100644
--- a/perception/tensorrt_bevdet/CMakeLists.txt
+++ b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.17)
-project(tensorrt_bevdet)
+project(autoware_tensorrt_bevdet)
 
 add_compile_options(-W)
 add_compile_options(-std=c++17)
diff --git a/perception/tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
similarity index 100%
rename from perception/tensorrt_bevdet/README.md
rename to perception/autoware_tensorrt_bevdet/README.md
diff --git a/perception/tensorrt_bevdet/config/bevdet.param.yaml b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
similarity index 100%
rename from perception/tensorrt_bevdet/config/bevdet.param.yaml
rename to perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
diff --git a/perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml b/perception/autoware_tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
similarity index 100%
rename from perception/tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
rename to perception/autoware_tensorrt_bevdet/config/bevdet_r50_4dlongterm_depth.yaml
diff --git a/perception/tensorrt_bevdet/config/cams_param.yaml b/perception/autoware_tensorrt_bevdet/config/cams_param.yaml
similarity index 100%
rename from perception/tensorrt_bevdet/config/cams_param.yaml
rename to perception/autoware_tensorrt_bevdet/config/cams_param.yaml
diff --git a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
similarity index 96%
rename from perception/tensorrt_bevdet/include/alignbev_plugin.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
index be249700b6516..beeeb8ed656d8 100644
--- a/perception/tensorrt_bevdet/include/alignbev_plugin.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef ALIGNBEV_PLUGIN_HPP_
-#define ALIGNBEV_PLUGIN_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -116,4 +116,4 @@ class AlignBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  // ALIGNBEV_PLUGIN_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
similarity index 97%
rename from perception/tensorrt_bevdet/include/bevdet.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index 3e1210f9b86e6..bd0ae58b3a4b7 100644
--- a/perception/tensorrt_bevdet/include/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef BEVDET_HPP_
-#define BEVDET_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
 
 #include "NvInfer.h"
 #include "common.hpp"
@@ -236,4 +236,4 @@ class BEVDet
   std::unique_ptr<adjFrame> adj_frame_ptr;
 };
 
-#endif  // BEVDET_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
similarity index 96%
rename from perception/tensorrt_bevdet/include/bevdet_node.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 5f7b945819918..2e4fe5604c4dd 100644
--- a/perception/tensorrt_bevdet/include/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef BEVDET_NODE_HPP_
-#define BEVDET_NODE_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 
 #include "bevdet.hpp"
 #include "cpu_jpegdecoder.hpp"
@@ -144,4 +144,4 @@ class TRTBEVDetNode : public rclcpp::Node
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
 };
 
-#endif  // BEVDET_NODE_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
diff --git a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
similarity index 96%
rename from perception/tensorrt_bevdet/include/bevpool_plugin.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
index a79a58b000f71..0a1f6e114589a 100644
--- a/perception/tensorrt_bevdet/include/bevpool_plugin.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef BEVPOOL_PLUGIN_HPP_
-#define BEVPOOL_PLUGIN_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -120,4 +120,4 @@ class BEVPoolPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  // BEVPOOL_PLUGIN_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
similarity index 96%
rename from perception/tensorrt_bevdet/include/common.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index 28a1698a579e2..8d3ad49521a7c 100644
--- a/perception/tensorrt_bevdet/include/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef COMMON_HPP_
-#define COMMON_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
 
 #include <NvInfer.h>
 #include <assert.h>
@@ -142,4 +142,4 @@ class Logger : public nvinfer1::ILogger
   Severity reportable_severity;
 };
 
-#endif  // COMMON_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
diff --git a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
similarity index 81%
rename from perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
index 3544212bf2c68..cfc5248856e5b 100644
--- a/perception/tensorrt_bevdet/include/cpu_jpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef CPU_JPEGDECODER_HPP_
-#define CPU_JPEGDECODER_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
 
 #include "common.hpp"
 
@@ -21,4 +21,4 @@
 int decode_cpu(
   const std::vector<std::vector<char>> & files_data, uchar * out_imgs, size_t width, size_t height);
 
-#endif  // CPU_JPEGDECODER_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
diff --git a/perception/tensorrt_bevdet/include/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
similarity index 92%
rename from perception/tensorrt_bevdet/include/data.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index cc448245f969e..637c1549546d1 100644
--- a/perception/tensorrt_bevdet/include/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef DATA_HPP_
-#define DATA_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
 
 #include "common.hpp"
 #include "nvjpegdecoder.hpp"
@@ -64,4 +64,4 @@ int read_image(std::string & image_names, std::vector<char> & raw_data);
 
 int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
 
-#endif  // DATA_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
diff --git a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
similarity index 95%
rename from perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
index af9ac003aa2ef..3d91763ed88a1 100644
--- a/perception/tensorrt_bevdet/include/gatherbev_plugin.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef GATHERBEV_PLUGIN_HPP_
-#define GATHERBEV_PLUGIN_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -117,4 +117,4 @@ class GatherBEVPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  // GATHERBEV_PLUGIN_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/include/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
similarity index 92%
rename from perception/tensorrt_bevdet/include/iou3d_nms.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
index 1a2cb37b4a5a8..5a95be4465317 100644
--- a/perception/tensorrt_bevdet/include/iou3d_nms.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
@@ -17,8 +17,8 @@ Written by Shaoshuai Shi
 All Rights Reserved 2019-2022.
 */
 #pragma once
-#ifndef IOU3D_NMS_HPP_
-#define IOU3D_NMS_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
 
 #include "common.hpp"
 
@@ -71,4 +71,4 @@ struct Box
   int label;
   bool is_drop;  // for nms
 };
-#endif  // IOU3D_NMS_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
diff --git a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
similarity index 94%
rename from perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index e5eca93bd5843..a9cc280a2b5a0 100644
--- a/perception/tensorrt_bevdet/include/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef NVJPEGDECODER_HPP_
-#define NVJPEGDECODER_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
 
 #ifdef __HAVE_NVJPEG__
 #include "common.hpp"
@@ -113,4 +113,4 @@ class nvjpegDecoder
 
 #endif
 
-#endif  // NVJPEGDECODER_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
diff --git a/perception/tensorrt_bevdet/include/postprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
similarity index 92%
rename from perception/tensorrt_bevdet/include/postprocess.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
index cb401239f5746..c7d0293f34888 100644
--- a/perception/tensorrt_bevdet/include/postprocess.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef POSTPROCESS_HPP_
-#define POSTPROCESS_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
 
 #include "common.hpp"
 #include "iou3d_nms.hpp"
@@ -69,4 +69,4 @@ class PostprocessGPU
   float * nms_rescale_factor_dev = nullptr;
 };
 
-#endif  // POSTPROCESS_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
diff --git a/perception/tensorrt_bevdet/include/preprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
similarity index 82%
rename from perception/tensorrt_bevdet/include/preprocess.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
index 3abe82844655c..e5aaedb972e5f 100644
--- a/perception/tensorrt_bevdet/include/preprocess.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
@@ -11,11 +11,11 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#ifndef PREPROCESS_HPP_
-#define PREPROCESS_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
 
 #include "common.hpp"
 
 void convert_RGBHWC_to_BGRCHW(uchar * input, uchar * output, int channels, int height, int width);
 
-#endif  // PREPROCESS_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
diff --git a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
similarity index 95%
rename from perception/tensorrt_bevdet/include/preprocess_plugin.hpp
rename to perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
index 2c40864b61259..ed6bbf861ab22 100644
--- a/perception/tensorrt_bevdet/include/preprocess_plugin.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef PREPROCESS_PLUGIN_HPP_
-#define PREPROCESS_PLUGIN_HPP_
+#ifndef AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
 
 #include <NvInfer.h>
 #include <unistd.h>
@@ -119,4 +119,4 @@ class PreprocessPluginCreator : public IPluginCreator
 };
 
 }  // namespace nvinfer1
-#endif  // PREPROCESS_PLUGIN_HPP_
+#endif  // AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
diff --git a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
similarity index 75%
rename from perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
rename to perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 0e55056962039..e0b69cfd3a3be 100644
--- a/perception/tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -11,15 +11,15 @@
   <arg name="output/pointcloud" default="points_painting"/>
   <arg name="output/boxes" default="boxes"/>
 
-  <arg name="data_path" default="$(env HOME)/autoware_data/tensorrt_bevdet"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet"/>
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx"/>
   <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine"/>
-  <arg name="cam_config" default="$(find-pkg-share tensorrt_bevdet)/config/cams_param.yaml"/>
-  <arg name="model_config" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
-  <arg name="param_file" default="$(find-pkg-share tensorrt_bevdet)/config/bevdet.param.yaml"/>
+  <arg name="cam_config" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/cams_param.yaml"/>
+  <arg name="model_config" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
+  <arg name="param_file" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet.param.yaml"/>
 
   <!-- bevdet node -->
-  <node pkg="tensorrt_bevdet" exec="tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
+  <node pkg="autoware_tensorrt_bevdet" exec="autoware_tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
     <!-- ros topic name -->
     <remap from="~/input/topic_cloud" to="$(var input/pointcloud)"/>
     <remap from="~/input/topic_img_fl" to="$(var input/img_fl)"/>
diff --git a/perception/tensorrt_bevdet/package.xml b/perception/autoware_tensorrt_bevdet/package.xml
similarity index 96%
rename from perception/tensorrt_bevdet/package.xml
rename to perception/autoware_tensorrt_bevdet/package.xml
index ba75862e6ff2f..a3ad3bfc30dd6 100644
--- a/perception/tensorrt_bevdet/package.xml
+++ b/perception/autoware_tensorrt_bevdet/package.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <package format="3">
-  <name>tensorrt_bevdet</name>
+  <name>autoware_tensorrt_bevdet</name>
   <version>0.0.1</version>
   <description>tensorrt library implementation for bevdet</description>
 
diff --git a/perception/tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
similarity index 98%
rename from perception/tensorrt_bevdet/src/alignbev_plugin.cu
rename to perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
index 444de602af910..8127931d2a3b5 100644
--- a/perception/tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "alignbev_plugin.hpp"
-#include "common.hpp"
+#include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
+#include "autoware/tensorrt_bevdet/common.hpp"
 
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
diff --git a/perception/tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
similarity index 99%
rename from perception/tensorrt_bevdet/src/bevdet.cpp
rename to perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 2cf249b12fdfe..9ae50dcede4aa 100644
--- a/perception/tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -11,12 +11,12 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "bevdet.hpp"
+#include "autoware/tensorrt_bevdet/bevdet.hpp"
 
-#include "alignbev_plugin.hpp"
-#include "bevpool_plugin.hpp"
-#include "gatherbev_plugin.hpp"
-#include "preprocess_plugin.hpp"
+#include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
+#include "autoware/tensorrt_bevdet/bevpool_plugin.hpp"
+#include "autoware/tensorrt_bevdet/gatherbev_plugin.hpp"
+#include "autoware/tensorrt_bevdet/preprocess_plugin.hpp"
 
 #include <thrust/functional.h>
 #include <thrust/sort.h>
diff --git a/perception/tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
similarity index 99%
rename from perception/tensorrt_bevdet/src/bevdet_node.cpp
rename to perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index bc293a4081a0a..eec70df016dd0 100644
--- a/perception/tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "bevdet_node.hpp"
+#include "autoware/tensorrt_bevdet/bevdet_node.hpp"
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
 std::map<int, std::vector<int>> colormap{
diff --git a/perception/tensorrt_bevdet/src/bevpool_plugin.cu b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
similarity index 99%
rename from perception/tensorrt_bevdet/src/bevpool_plugin.cu
rename to perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
index 9b98cba128c29..d9d1b5f94211c 100644
--- a/perception/tensorrt_bevdet/src/bevpool_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "bevpool_plugin.hpp"
-#include "common.hpp"
+#include "autoware/tensorrt_bevdet/bevpool_plugin.hpp"
+#include "autoware/tensorrt_bevdet/common.hpp"
 
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
diff --git a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
similarity index 96%
rename from perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
rename to perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index c7ca8b063fd21..4a24c02b4ca5e 100644
--- a/perception/tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -11,9 +11,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "cpu_jpegdecoder.hpp"
+#include "autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp"
 
-#include "preprocess.hpp"
+#include "autoware/tensorrt_bevdet/preprocess.hpp"
 
 #include <cuda_runtime.h>
 #include <jpeglib.h>
diff --git a/perception/tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
similarity index 97%
rename from perception/tensorrt_bevdet/src/data.cpp
rename to perception/autoware_tensorrt_bevdet/src/data.cpp
index 810ab7cc0e8b1..65203d46eb29b 100644
--- a/perception/tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -11,9 +11,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "data.hpp"
+#include "autoware/tensorrt_bevdet/data.hpp"
 
-#include "cpu_jpegdecoder.hpp"
+#include "autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp"
 
 #include <stdlib.h>
 
diff --git a/perception/tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
similarity index 98%
rename from perception/tensorrt_bevdet/src/gatherbev_plugin.cu
rename to perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
index 6ac903625d093..64a5244696b10 100644
--- a/perception/tensorrt_bevdet/src/gatherbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "common.hpp"
-#include "gatherbev_plugin.hpp"
+#include "autoware/tensorrt_bevdet/common.hpp"
+#include "autoware/tensorrt_bevdet/gatherbev_plugin.hpp"
 
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>
diff --git a/perception/tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
similarity index 99%
rename from perception/tensorrt_bevdet/src/iou3d_nms.cu
rename to perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index 1fbf2503be528..d4302094466b7 100644
--- a/perception/tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -17,7 +17,7 @@ Written by Shaoshuai Shi
 All Rights Reserved 2019-2022.
 */
 
-#include "iou3d_nms.hpp"
+#include "autoware/tensorrt_bevdet/iou3d_nms.hpp"
 
 struct Point
 {
diff --git a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
similarity index 99%
rename from perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
rename to perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index a85256ba0c15a..035e70d6d6102 100644
--- a/perception/tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -13,7 +13,7 @@
 // limitations under the License.
 #ifdef __HAVE_NVJPEG__
 
-#include "nvjpegdecoder.hpp"
+#include "autoware/tensorrt_bevdet/nvjpegdecoder.hpp"
 
 #include <chrono>
 
diff --git a/perception/tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
similarity index 99%
rename from perception/tensorrt_bevdet/src/postprocess.cu
rename to perception/autoware_tensorrt_bevdet/src/postprocess.cu
index b8ba6b3cadd1a..c102425f44a9e 100644
--- a/perception/tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "postprocess.hpp"
+#include "autoware/tensorrt_bevdet/postprocess.hpp"
 
 #include <thrust/device_vector.h>
 #include <thrust/host_vector.h>
diff --git a/perception/tensorrt_bevdet/src/preprocess.cu b/perception/autoware_tensorrt_bevdet/src/preprocess.cu
similarity index 96%
rename from perception/tensorrt_bevdet/src/preprocess.cu
rename to perception/autoware_tensorrt_bevdet/src/preprocess.cu
index 2c454403d38de..3b48eb9a809e7 100644
--- a/perception/tensorrt_bevdet/src/preprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess.cu
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "preprocess.hpp"
+#include "autoware/tensorrt_bevdet/preprocess.hpp"
 
 #include <thrust/fill.h>
 
diff --git a/perception/tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
similarity index 98%
rename from perception/tensorrt_bevdet/src/preprocess_plugin.cu
rename to perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
index 0eea36251c782..10dab65a9bef1 100644
--- a/perception/tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#include "common.hpp"
-#include "preprocess_plugin.hpp"
+#include "autoware/tensorrt_bevdet/common.hpp"
+#include "autoware/tensorrt_bevdet/preprocess_plugin.hpp"
 
 #include <cuda_fp16.h>
 #include <cuda_runtime_api.h>

From afceea0c8bd7a53ec061d1657c97d824d216fca0 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 9 Aug 2024 13:31:27 +0800
Subject: [PATCH 20/85] feat(tensorrt_bevdet): add new 3d object detection
 method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp       | 9 ---------
 .../autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp       | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 9ae50dcede4aa..c956534867798 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -435,15 +435,6 @@ int BEVDet::InitEngine(const std::string & engine_file)
 
   return EXIT_SUCCESS;
 }
-void save_tensor(size_t size, const void * ptr, const std::string & file)
-{
-  float * tensor = new float[size];
-  CHECK_CUDA(cudaMemcpy(tensor, ptr, size * sizeof(float), cudaMemcpyDeviceToHost));
-  std::ofstream out(file, std::ios::out | std::ios::binary);
-  out.write(reinterpret_cast<char *>(tensor), size * sizeof(float));
-  out.close();
-  delete[] tensor;
-}
 
 int BEVDet::DeserializeTRTEngine(
   const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr)
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 035e70d6d6102..8e71540e4879e 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -175,7 +175,7 @@ void decode_single_image(
   CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
 
   CHECK_NVJPEG(nvjpegJpegStreamParse(
-    share_param.nvjpeg_handle, (const uchar *)img_data.data(), img_data.size(), 0, 0,
+    share_param.nvjpeg_handle, reinterpret_cast<const uchar *>(img_data.data()), img_data.size(), 0, 0,
     params.jpeg_streams[buffer_index]));
 
   CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(

From 6d61812db32c26eea33c323218e768cd64be43a4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 05:35:47 +0000
Subject: [PATCH 21/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 8e71540e4879e..9d9fe9b68f051 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -175,8 +175,8 @@ void decode_single_image(
   CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
 
   CHECK_NVJPEG(nvjpegJpegStreamParse(
-    share_param.nvjpeg_handle, reinterpret_cast<const uchar *>(img_data.data()), img_data.size(), 0, 0,
-    params.jpeg_streams[buffer_index]));
+    share_param.nvjpeg_handle, reinterpret_cast<const uchar *>(img_data.data()), img_data.size(), 0,
+    0, params.jpeg_streams[buffer_index]));
 
   CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
     params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));

From 6e370e3fed0d824328d81522a8abc0f26eff8e24 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 9 Aug 2024 16:22:54 +0800
Subject: [PATCH 22/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/data.hpp                   | 2 +-
 perception/autoware_tensorrt_bevdet/src/data.cpp                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 637c1549546d1..7770110dfa056 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -60,7 +60,7 @@ Eigen::Translation3f fromYamlTrans(YAML::Node x);
 Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
 Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
 
-int read_image(std::string & image_names, std::vector<char> & raw_data);
+int read_image(const std::string & image_names, std::vector<char> & raw_data);
 
 int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
 
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index 65203d46eb29b..d3b88c7520149 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -58,7 +58,7 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   }
 }
 
-int read_image(std::string & image_names, std::vector<char> & raw_data)
+int read_image(const std::string & image_names, std::vector<char> & raw_data)
 {
   std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
 

From 43d311868703b583cf161e9daca994b80692a129 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Fri, 9 Aug 2024 16:32:34 +0800
Subject: [PATCH 23/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/data.hpp                   | 2 +-
 perception/autoware_tensorrt_bevdet/src/data.cpp                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 7770110dfa056..0c09a21bbc952 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -62,6 +62,6 @@ Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
 
 int read_image(const std::string & image_names, std::vector<char> & raw_data);
 
-int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
+int read_sample(const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
 
 #endif  // AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index d3b88c7520149..e4720f7b207a7 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -79,7 +79,7 @@ int read_image(const std::string & image_names, std::vector<char> & raw_data)
   return EXIT_SUCCESS;
 }
 
-int read_sample(std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
+int read_sample(const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
 {
   imgs_data.resize(imgs_file.size());
 

From 63ffd19e6a1a2d448fd4432dafb68e070a059b9a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 08:34:44 +0000
Subject: [PATCH 24/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/data.hpp                  | 3 ++-
 perception/autoware_tensorrt_bevdet/src/data.cpp               | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 0c09a21bbc952..2afc30bd43f70 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -62,6 +62,7 @@ Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
 
 int read_image(const std::string & image_names, std::vector<char> & raw_data);
 
-int read_sample(const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
+int read_sample(
+  const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
 
 #endif  // AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index e4720f7b207a7..3b7783c4b8dc0 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -79,7 +79,8 @@ int read_image(const std::string & image_names, std::vector<char> & raw_data)
   return EXIT_SUCCESS;
 }
 
-int read_sample(const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
+int read_sample(
+  const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
 {
   imgs_data.resize(imgs_file.size());
 

From 156e10f07b763028c228350f712e41472299b526 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 12 Aug 2024 13:08:42 +0800
Subject: [PATCH 25/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detection/detection.launch.xml            |  1 -
 .../detector/camera_bev_detector.launch.xml   |  5 ----
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  |  7 +-----
 .../launch/tensorrt_bevdet.launch.xml         |  4 ----
 .../src/bevdet_node.cpp                       | 23 +++----------------
 5 files changed, 4 insertions(+), 36 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
index abd79bd75019b..100467dfbcd60 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
@@ -219,7 +219,6 @@
   <group if="$(var switch/detector/camera_bev)">
     <!-- Camera-based bev detectors-->
     <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml">
-      <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
       <arg name="input/camera0/image" value="$(var input/camera0/image)"/>
       <arg name="input/camera0/info" value="$(var input/camera0/info)"/>
       <arg name="input/camera1/image" value="$(var input/camera1/image)"/>
diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index 8ad5f3b965805..a61c96676dca4 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -1,8 +1,5 @@
 <?xml version="1.0"?>
 <launch>
-  <!-- Lidar parameters -->
-  <arg name="input/pointcloud"/>
-
   <!-- Camera parameters -->
   <arg name="input/camera0/image"/>
   <arg name="input/camera0/info"/>
@@ -32,14 +29,12 @@
     <push-ros-namespace namespace="bevdet"/>
     <group>
       <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
-        <arg name="input/pointcloud" value="$(var input/pointcloud)"/>
         <arg name="input/img_fl" value="$(var input/camera0/image)"/>
         <arg name="input/img_f" value="$(var input/camera1/image)"/>
         <arg name="input/img_fr" value="$(var input/camera2/image)"/>
         <arg name="input/img_bl" value="$(var input/camera3/image)"/>
         <arg name="input/img_b" value="$(var input/camera4/image)"/>
         <arg name="input/img_br" value="$(var input/camera5/image)"/>
-        <arg name="output/pointcloud" value="points_painting"/>
         <arg name="output/boxes" value="boxes"/>
         <arg name="data_path" value="$(var data_path)/"/>
         <arg name="onnx_file" value="$(var onnx_file)"/>
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 2e4fe5604c4dd..c03fa8b20c7da 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -51,8 +51,6 @@
 #include <thread>
 #include <vector>
 
-typedef pcl::PointXYZI PointT;
-
 uint8_t getSemanticType(const std::string & class_name);
 void Getinfo(void);
 
@@ -109,11 +107,9 @@ class TRTBEVDetNode : public rclcpp::Node
   uchar * imgs_dev_ = nullptr;
   float score_thre_;
 
-  rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr pub_cloud_;
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
     pub_boxes_;  // ros2无该消息类型
 
-  message_filters::Subscriber<sensor_msgs::msg::PointCloud2> sub_cloud_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_;
@@ -122,7 +118,7 @@ class TRTBEVDetNode : public rclcpp::Node
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_;
 
   typedef message_filters::sync_policies::ApproximateTime<
-    sensor_msgs::msg::PointCloud2, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
+    sensor_msgs::msg::Image, sensor_msgs::msg::Image,
     sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
     sensor_msgs::msg::Image>
     MySyncPolicy;
@@ -135,7 +131,6 @@ class TRTBEVDetNode : public rclcpp::Node
   ~TRTBEVDetNode();
 
   void callback(
-    const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index e0b69cfd3a3be..6f9fafedb86ea 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -1,14 +1,12 @@
 <?xml version="1.0"?>
 <launch>
   <!-- input topic name -->
-  <arg name="input/pointcloud" default="/lidar_top"/>
   <arg name="input/img_fl" default="/cam_front_left/raw"/>
   <arg name="input/img_f" default="/cam_front/raw"/>
   <arg name="input/img_fr" default="/cam_front_right/raw"/>
   <arg name="input/img_bl" default="/cam_back_left/raw"/>
   <arg name="input/img_b" default="/cam_back/raw"/>
   <arg name="input/img_br" default="/cam_back_right/raw"/>
-  <arg name="output/pointcloud" default="points_painting"/>
   <arg name="output/boxes" default="boxes"/>
 
   <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet"/>
@@ -21,7 +19,6 @@
   <!-- bevdet node -->
   <node pkg="autoware_tensorrt_bevdet" exec="autoware_tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
     <!-- ros topic name -->
-    <remap from="~/input/topic_cloud" to="$(var input/pointcloud)"/>
     <remap from="~/input/topic_img_fl" to="$(var input/img_fl)"/>
     <remap from="~/input/topic_img_f" to="$(var input/img_f)"/>
     <remap from="~/input/topic_img_fr" to="$(var input/img_fr)"/>
@@ -29,7 +26,6 @@
     <remap from="~/input/topic_img_b" to="$(var input/img_b)"/>
     <remap from="~/input/topic_img_br" to="$(var input/img_br)"/>
 
-    <remap from="~/output/painting_cloud" to="$(var output/pointcloud)"/>
     <remap from="~/output/boxes" to="$(var output/boxes)"/>
 
     <param name="onnx_file" value="$(var onnx_file)"/>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index eec70df016dd0..983ca63b1e321 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -129,7 +129,6 @@ TRTBEVDetNode::TRTBEVDetNode(
   using std::placeholders::_4;
   using std::placeholders::_5;
   using std::placeholders::_6;
-  using std::placeholders::_7;
 
   score_thre_ = this->declare_parameter<float>("post_process_params.score_thre", 0.2);
 
@@ -163,14 +162,9 @@ TRTBEVDetNode::TRTBEVDetNode(
   CHECK_CUDA(cudaMalloc(
     reinterpret_cast<void **>(&imgs_dev_), img_N_ * 3 * img_w_ * img_h_ * sizeof(uchar)));
 
-  pub_cloud_ = this->create_publisher<sensor_msgs::msg::PointCloud2>(
-    "~/output/painting_cloud", rclcpp::SensorDataQoS());
   pub_boxes_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjects>(
     "~/output/boxes", rclcpp::QoS{1});
 
-  sub_cloud_.subscribe(
-    this, "~/input/topic_cloud",
-    rclcpp::QoS{1}.get_rmw_qos_profile());  // rclcpp::QoS{1}.get_rmw_qos_profile()
   sub_f_img_.subscribe(
     this, "~/input/topic_img_f",
     rclcpp::QoS{1}.get_rmw_qos_profile());  // rmw_qos_profile_sensor_data
@@ -183,15 +177,14 @@ TRTBEVDetNode::TRTBEVDetNode(
   sub_br_img_.subscribe(this, "~/input/topic_img_br", rclcpp::QoS{1}.get_rmw_qos_profile());
 
   sync_ = std::make_shared<Sync>(
-    MySyncPolicy(10), sub_cloud_, sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_,
+    MySyncPolicy(10), sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_,
     sub_br_img_);
 
   sync_->registerCallback(
-    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6, _7));  // 绑定回调函数
+    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));  // 绑定回调函数
 }
 
 void TRTBEVDetNode::callback(
-  const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_cloud,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_fr_img,
@@ -199,9 +192,6 @@ void TRTBEVDetNode::callback(
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img)
 {
-  pcl::PointCloud<PointT>::Ptr cloud(new pcl::PointCloud<PointT>);
-
-  pcl::fromROSMsg(*msg_cloud, *cloud);
 
   cv::Mat img_fl, img_f, img_fr, img_bl, img_b, img_br;
   std::vector<cv::Mat> imgs;
@@ -237,18 +227,11 @@ void TRTBEVDetNode::callback(
 
   autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
   bevdet_objects.header.frame_id = "base_link";
-  bevdet_objects.header.stamp = msg_cloud->header.stamp;
+  bevdet_objects.header.stamp = msg_f_img->header.stamp;
 
   box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_);
 
   pub_boxes_->publish(bevdet_objects);
-
-  sensor_msgs::msg::PointCloud2 msg_cloud_painting;
-  pcl::toROSMsg(*cloud, msg_cloud_painting);
-
-  msg_cloud_painting.header.frame_id = msg_cloud->header.frame_id;
-  msg_cloud_painting.header.stamp = msg_cloud->header.stamp;
-  pub_cloud_->publish(msg_cloud_painting);
 }
 
 TRTBEVDetNode::~TRTBEVDetNode()

From 58d43569411e7c6a8ea73169f16b4e1d8ee9ca47 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 05:11:44 +0000
Subject: [PATCH 26/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp          | 3 +--
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp       | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index c03fa8b20c7da..696a85c3e5b26 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -118,9 +118,8 @@ class TRTBEVDetNode : public rclcpp::Node
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_;
 
   typedef message_filters::sync_policies::ApproximateTime<
-    sensor_msgs::msg::Image, sensor_msgs::msg::Image,
     sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
-    sensor_msgs::msg::Image>
+    sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image>
     MySyncPolicy;
 
   typedef message_filters::Synchronizer<MySyncPolicy> Sync;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 983ca63b1e321..c7bdf2031a0a1 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -177,8 +177,7 @@ TRTBEVDetNode::TRTBEVDetNode(
   sub_br_img_.subscribe(this, "~/input/topic_img_br", rclcpp::QoS{1}.get_rmw_qos_profile());
 
   sync_ = std::make_shared<Sync>(
-    MySyncPolicy(10), sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_,
-    sub_br_img_);
+    MySyncPolicy(10), sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_, sub_br_img_);
 
   sync_->registerCallback(
     std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));  // 绑定回调函数
@@ -192,7 +191,6 @@ void TRTBEVDetNode::callback(
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img)
 {
-
   cv::Mat img_fl, img_f, img_fr, img_bl, img_b, img_br;
   std::vector<cv::Mat> imgs;
   img_fl = cv_bridge::toCvShare(msg_fl_img, "bgr8")->image;

From 415be2e10b4eea2a8477098c8d12ecfeb8a0fd26 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 13 Aug 2024 16:44:32 +0800
Subject: [PATCH 27/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../launch/object_recognition/detection/detection.launch.xml  | 2 --
 .../detection/detector/camera_bev_detector.launch.xml         | 4 +---
 launch/tier4_perception_launch/launch/perception.launch.xml   | 2 --
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
index 100467dfbcd60..d505e1a67088a 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
@@ -6,7 +6,6 @@
   <!-- Pipeline junctions -->
   <arg name="mode" description="options: `camera_lidar_radar_fusion`, `camera_lidar_fusion`, `lidar_radar_fusion`, `lidar` or `radar`"/>
   <arg name="lidar_detection_model" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
-  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
   <arg name="use_object_filter" description="use object filter"/>
   <arg name="objects_filter_method" description="options: `lanelet_filter` or `position_filter`"/>
   <arg name="use_pointcloud_map" description="use pointcloud map filter"/>
@@ -236,7 +235,6 @@
       <arg name="input/camera7/image" value="$(var input/camera7/image)"/>
       <arg name="input/camera7/info" value="$(var input/camera7/info)"/>
       <arg name="number_of_cameras" value="$(var number_of_cameras)"/>
-      <arg name="bev_detection_model" value="$(var bev_detection_model)"/>
     </include>
   </group>
   <!-- DetectionByTracker -->
diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index a61c96676dca4..361e8103d7440 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -20,14 +20,12 @@
   <arg name="number_of_cameras"/>
 
   <!-- Model parameters -->
-  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
   <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet" description="packages data and artifacts directory path"/>
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx" description="onnx model file path"/>
   <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine" description="engine model file path"/>
   <!-- BEVDet -->
-  <group if="$(eval &quot;'$(var bev_detection_model)'=='bevdet'&quot;)">
-    <push-ros-namespace namespace="bevdet"/>
     <group>
+      <push-ros-namespace namespace="bevdet"/>
       <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
         <arg name="input/img_fl" value="$(var input/camera0/image)"/>
         <arg name="input/img_f" value="$(var input/camera1/image)"/>
diff --git a/launch/tier4_perception_launch/launch/perception.launch.xml b/launch/tier4_perception_launch/launch/perception.launch.xml
index 2b7ae21bd3fe6..71c7544f4dd94 100644
--- a/launch/tier4_perception_launch/launch/perception.launch.xml
+++ b/launch/tier4_perception_launch/launch/perception.launch.xml
@@ -48,7 +48,6 @@
   <arg name="mode" default="camera_lidar_fusion" description="options: `camera_lidar_radar_fusion`, `camera_lidar_fusion`, `lidar_radar_fusion`, `lidar` or `radar`"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="lidar_detection_model" default="centerpoint" description="options: `transfusion`, `centerpoint`, `apollo`, `pointpainting`, `clustering`"/>
-  <arg name="bev_detection_model" default="bevdet" description="options: `bevdet`"/>
   <arg name="image_raw0" default="/sensing/camera/camera0/image_rect_color" description="image raw topic name"/>
   <arg name="camera_info0" default="/sensing/camera/camera0/camera_info" description="camera info topic name"/>
   <arg name="detection_rois0" default="/perception/object_recognition/detection/rois0" description="detection rois output topic name"/>
@@ -199,7 +198,6 @@
         <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detection.launch.xml">
           <arg name="mode" value="$(var mode)"/>
           <arg name="lidar_detection_model" value="$(var lidar_detection_model)"/>
-          <arg name="bev_detection_model" value="$(var bev_detection_model)"/>
           <arg name="use_multi_channel_tracker_merger" value="$(var use_multi_channel_tracker_merger)"/>
           <arg name="number_of_cameras" value="$(var image_number)"/>
           <arg name="input/camera0/image" value="$(var image_raw0)"/>

From 8c28774addb8317085fd2b51a53609764bad7128 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 13 Aug 2024 17:04:27 +0800
Subject: [PATCH 28/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../object_recognition/detection/detection.launch.xml       | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
index d505e1a67088a..38712790feeb0 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detection.launch.xml
@@ -66,7 +66,6 @@
   <arg name="switch/detector/lidar_rule" default="false"/>
   <arg name="switch/detector/radar" default="false"/>
   <arg name="switch/detector/radar_only" default="false"/>
-  <arg name="switch/detector/camera_bev" default="false"/>
   <arg name="switch/detector/tracker_based" default="false"/>
   <let name="switch/detector/tracker_based" value="true" if="$(var use_detection_by_tracker)"/>
 
@@ -124,9 +123,6 @@
     <let name="switch/detector/radar_only" value="true" unless="$(var use_multi_channel_tracker_merger)"/>
     <let name="switch/detector/tracker_based" value="false"/>
   </group>
-  <group scoped="false" if="$(eval '&quot;$(var mode)&quot;==&quot;camera&quot;')">
-    <let name="switch/detector/camera_bev" value="true"/>
-  </group>
 
   <!-- Detector -->
   <group if="$(var switch/detector/camera_lidar)">
@@ -215,7 +211,7 @@
       <arg name="radar_object_clustering_param_path" value="$(var radar_object_clustering_param_path)"/>
     </include>
   </group>
-  <group if="$(var switch/detector/camera_bev)">
+  <group if="$(eval '&quot;$(var mode)&quot;==&quot;camera&quot;')">
     <!-- Camera-based bev detectors-->
     <include file="$(find-pkg-share tier4_perception_launch)/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml">
       <arg name="input/camera0/image" value="$(var input/camera0/image)"/>

From a5da3effd8a92e013bb4cea9f38d9e2495e7b1c8 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 13 Aug 2024 17:47:43 +0800
Subject: [PATCH 29/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware_tensorrt_bevdet/config/bevdet.param.yaml     | 2 +-
 .../autoware_tensorrt_bevdet/src/alignbev_plugin.cu       | 8 ++++----
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
index 433d9caa97b64..193c7ad272236 100644
--- a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
+++ b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
@@ -7,5 +7,5 @@
         cams: ["CAM_FRONT_LEFT", "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_LEFT", "CAM_BACK", "CAM_BACK_RIGHT"]
       post_process_params:
         # post-process params
-        score_thre: 0.2
+        score_threshold: 0.2
         class_names: ["car", "truck", "construction_vehicle", "bus", "trailer", "barrier", "motorcycle", "bicycle", "pedestrian", "traffic_cone"]
diff --git a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
index 8127931d2a3b5..ecd51517f0de1 100644
--- a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
@@ -188,13 +188,13 @@ int32_t AlignBEVPlugin::enqueue(
   switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
       if (inputDesc[0].type == DataType::kFLOAT) {
-        // printf("align : fp32, fp32\n");
+        // align : fp32, fp32;
         align_bev_kernel<float, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
           count, reinterpret_cast<const float *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
           output_desc);
       } else {
-        // printf("align : fp16, fp32\n");
+        // align : fp16, fp32
         align_bev_kernel<__half, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
           count, reinterpret_cast<const __half *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
@@ -203,13 +203,13 @@ int32_t AlignBEVPlugin::enqueue(
       break;
     case int(DataType::kHALF):
       if (inputDesc[0].type == DataType::kFLOAT) {
-        // printf("align : fp32, fp16\n");
+        // align : fp32, fp16
         align_bev_kernel<float, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
           count, reinterpret_cast<const float *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
           output_desc);
       } else {
-        // printf("align : fp16, fp16\n");
+        // align : fp16, fp16
         align_bev_kernel<__half, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
           count, reinterpret_cast<const __half *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index c7bdf2031a0a1..c2969da2514f8 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -130,7 +130,7 @@ TRTBEVDetNode::TRTBEVDetNode(
   using std::placeholders::_5;
   using std::placeholders::_6;
 
-  score_thre_ = this->declare_parameter<float>("post_process_params.score_thre", 0.2);
+  score_thre_ = this->declare_parameter<float>("post_process_params.score_threshold", 0.2);
 
   img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
   img_w_ = this->declare_parameter<int>("data_params.W", 1600);  // W: 1600

From a33aa0bba1c28c00aba2dd629d9c86fdc78ad9c7 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 13 Aug 2024 17:53:55 +0800
Subject: [PATCH 30/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detector/camera_bev_detector.launch.xml   | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index 361e8103d7440..bad876765051d 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -24,20 +24,19 @@
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx" description="onnx model file path"/>
   <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine" description="engine model file path"/>
   <!-- BEVDet -->
-    <group>
-      <push-ros-namespace namespace="bevdet"/>
-      <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
-        <arg name="input/img_fl" value="$(var input/camera0/image)"/>
-        <arg name="input/img_f" value="$(var input/camera1/image)"/>
-        <arg name="input/img_fr" value="$(var input/camera2/image)"/>
-        <arg name="input/img_bl" value="$(var input/camera3/image)"/>
-        <arg name="input/img_b" value="$(var input/camera4/image)"/>
-        <arg name="input/img_br" value="$(var input/camera5/image)"/>
-        <arg name="output/boxes" value="boxes"/>
-        <arg name="data_path" value="$(var data_path)/"/>
-        <arg name="onnx_file" value="$(var onnx_file)"/>
-        <arg name="engine_file" value="$(var engine_file)"/>
-      </include>
-    </group>
+  <group>
+    <push-ros-namespace namespace="bevdet"/>
+    <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
+      <arg name="input/img_fl" value="$(var input/camera0/image)"/>
+      <arg name="input/img_f" value="$(var input/camera1/image)"/>
+      <arg name="input/img_fr" value="$(var input/camera2/image)"/>
+      <arg name="input/img_bl" value="$(var input/camera3/image)"/>
+      <arg name="input/img_b" value="$(var input/camera4/image)"/>
+      <arg name="input/img_br" value="$(var input/camera5/image)"/>
+      <arg name="output/boxes" value="boxes"/>
+      <arg name="data_path" value="$(var data_path)/"/>
+      <arg name="onnx_file" value="$(var onnx_file)"/>
+      <arg name="engine_file" value="$(var engine_file)"/>
+    </include>
   </group>
 </launch>

From 03f24cb5c3097c8b9e3e47853fe9e53c3d6ac57f Mon Sep 17 00:00:00 2001
From: Yuxuan Liu <619684051@qq.com>
Date: Wed, 14 Aug 2024 15:30:27 +0900
Subject: [PATCH 31/85] use an optimized image_transport

Signed-off-by: Yuxuan Liu <619684051@qq.com>
---
 .../src/bevdet_node.cpp                       | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index c2969da2514f8..835d7a8c5eecf 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
+#include "autoware/tensorrt_bevdet/preprocess.hpp"
+#include <chrono>
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
 std::map<int, std::vector<int>> colormap{
@@ -183,6 +185,22 @@ TRTBEVDetNode::TRTBEVDetNode(
     std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));  // 绑定回调函数
 }
 
+void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
+{
+  uchar * temp = new uchar[width * height * 3];
+  uchar * temp_gpu = nullptr;
+  CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
+
+  for (size_t i = 0; i < imgs.size(); i++) {
+    cv::cvtColor(imgs[i], imgs[i], cv::COLOR_BGR2RGB);
+    CHECK_CUDA(cudaMemcpy(temp_gpu, imgs[i].data, width * height * 3, cudaMemcpyHostToDevice));
+    convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
+
+  }
+  delete[] temp;
+  CHECK_CUDA(cudaFree(temp_gpu));
+}
+
 void TRTBEVDetNode::callback(
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
@@ -208,11 +226,7 @@ void TRTBEVDetNode::callback(
   imgs.emplace_back(img_br);
 
   std::vector<std::vector<char>> imgs_data;
-  // Mat -> Vetor<char>
-  cvImgToArr(imgs, imgs_data);
-
-  // cpu imgs_data -> gpu imgs_dev
-  decode_cpu(imgs_data, imgs_dev_, img_w_, img_h_);
+  image_transport(imgs, imgs_dev_, img_w_, img_h_);
 
   // uchar *imgs_dev
   sampleData_.imgs_dev = imgs_dev_;

From 67ed09a169678e934a7f4a2d4215f0f120868aa9 Mon Sep 17 00:00:00 2001
From: YuxuanLiuTier4Desktop <619684051@qq.com>
Date: Wed, 14 Aug 2024 16:18:03 +0900
Subject: [PATCH 32/85] clean up un-needed include

Signed-off-by: YuxuanLiuTier4Desktop <619684051@qq.com>
---
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 835d7a8c5eecf..f4ec73a9a6e45 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -13,7 +13,6 @@
 // limitations under the License.
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
 #include "autoware/tensorrt_bevdet/preprocess.hpp"
-#include <chrono>
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
 std::map<int, std::vector<int>> colormap{

From 5d17ef9a5454f76aa908527cda47d93987deb325 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 02:00:18 +0000
Subject: [PATCH 33/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index f4ec73a9a6e45..5d902bcbb5030 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
+
 #include "autoware/tensorrt_bevdet/preprocess.hpp"
 
 using Label = autoware_perception_msgs::msg::ObjectClassification;
@@ -194,7 +195,6 @@ void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width,
     cv::cvtColor(imgs[i], imgs[i], cv::COLOR_BGR2RGB);
     CHECK_CUDA(cudaMemcpy(temp_gpu, imgs[i].data, width * height * 3, cudaMemcpyHostToDevice));
     convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
-
   }
   delete[] temp;
   CHECK_CUDA(cudaFree(temp_gpu));

From 1fbf791a04eef6a0f763d1ff171238c02858cdab Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 15 Aug 2024 10:23:44 +0800
Subject: [PATCH 34/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 5d902bcbb5030..7fc6ff05e3133 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -182,7 +182,7 @@ TRTBEVDetNode::TRTBEVDetNode(
     MySyncPolicy(10), sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_, sub_br_img_);
 
   sync_->registerCallback(
-    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));  // 绑定回调函数
+    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));
 }
 
 void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
@@ -224,7 +224,6 @@ void TRTBEVDetNode::callback(
   imgs.emplace_back(img_b);
   imgs.emplace_back(img_br);
 
-  std::vector<std::vector<char>> imgs_data;
   image_transport(imgs, imgs_dev_, img_w_, img_h_);
 
   // uchar *imgs_dev

From e1a57966df08db6c78f0f0a807ff6b78d5c1d7d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 02:29:15 +0000
Subject: [PATCH 35/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 7fc6ff05e3133..678a29f168c17 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -181,8 +181,7 @@ TRTBEVDetNode::TRTBEVDetNode(
   sync_ = std::make_shared<Sync>(
     MySyncPolicy(10), sub_fl_img_, sub_f_img_, sub_fr_img_, sub_bl_img_, sub_b_img_, sub_br_img_);
 
-  sync_->registerCallback(
-    std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));
+  sync_->registerCallback(std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));
 }
 
 void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)

From 9b475dde2983a6cd75d2c022e80c9990c222e23a Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 15 Aug 2024 12:15:53 +0800
Subject: [PATCH 36/85] feat(autoware_tensorrt_bevdet): add new 3d object
 detection method

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  | 27 --------------
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 36 ++++++++++---------
 .../src/bevdet_node.cpp                       | 26 --------------
 3 files changed, 20 insertions(+), 69 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 696a85c3e5b26..242139eb07849 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -52,38 +52,11 @@
 #include <vector>
 
 uint8_t getSemanticType(const std::string & class_name);
-void Getinfo(void);
 
 void box3DToDetectedObjects(
   const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
   const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
 
-// opencv Mat-> std::vector<char>
-int cvToArr(cv::Mat img, std::vector<char> & raw_data)
-{
-  if (img.empty()) {
-    std::cerr << "image is empty. " << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  std::vector<u_char> raw_data_;
-  cv::imencode(".jpg", img, raw_data_);
-  raw_data = std::vector<char>(raw_data_.begin(), raw_data_.end());
-  return EXIT_SUCCESS;
-}
-
-int cvImgToArr(std::vector<cv::Mat> & imgs, std::vector<std::vector<char>> & imgs_data)
-{
-  imgs_data.resize(imgs.size());
-
-  for (size_t i = 0; i < imgs_data.size(); i++) {
-    if (cvToArr(imgs[i], imgs_data[i])) {
-      return EXIT_FAILURE;
-    }
-  }
-  return EXIT_SUCCESS;
-}
-
 class TRTBEVDetNode : public rclcpp::Node
 {
 private:
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index c956534867798..e5597ab461be9 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -38,7 +38,7 @@ BEVDet::BEVDet(
 {
   InitParams(config_file);
   if (n_img != N_img) {
-    printf("BEVDet need %d images, but given %d images!", N_img, n_img);
+    std::cerr << "BEVDet needs " << N_img << " images, but " << n_img << " images were given!" << std::endl;
   }
   auto start = high_resolution_clock::now();
 
@@ -52,13 +52,13 @@ BEVDet::BEVDet(
     ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
-  printf("InitVewTransformer cost time : %.4lf ms\n", t.count() * 1000);
+  std::cout << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
 
   if (access(engine_file.c_str(), F_OK) == 0) {
-    printf("Inference engine prepared.");
+    std::cout << "Inference engine prepared." << std::endl;
   } else {
     // onnx to engine
-    printf("Could not find %s, try making TensorRT engine from onnx", engine_file.c_str());
+    std::cerr << "Could not find " << engine_file << ", trying to make TensorRT engine from ONNX." << std::endl;
     ExportEngine(onnx_file, engine_file);
   }
   InitEngine(engine_file);  // FIXME
@@ -129,7 +129,6 @@ void BEVDet::InitCamParams(
   CHECK_CUDA(cudaMemcpy(
     trt_buffer_dev[buffer_map["cam_params"]], cam_params_host,
     trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
-  // printf("trans : %d cam : %d\n", transform_size, cam_params_size);
 }
 
 void BEVDet::InitParams(const std::string & config_file)
@@ -376,14 +375,19 @@ void BEVDet::InitViewTransformer(
   interval_starts_ptr.reset(interval_starts_host_ptr);
   interval_lengths_ptr.reset(interval_lengths_host_ptr);
 
-  printf("valid_feat_num: %d \n", valid_feat_num);
-  printf("unique_bev_num: %d \n", unique_bev_num);
+  std::cout << "valid_feat_num: " << valid_feat_num << std::endl;
+  std::cout << "unique_bev_num: " << unique_bev_num << std::endl;
 }
 
 void print_dim(nvinfer1::Dims dim)
 {
   for (auto i = 0; i < dim.nbDims; i++) {
-    printf("%d%c", dim.d[i], i == dim.nbDims - 1 ? '\n' : ' ');
+    std::cout << dim.d[i];
+    if (i == dim.nbDims - 1) {
+      std::cout << std::endl;
+    } else {
+      std::cout << " ";
+    }
   }
 }
 
@@ -469,9 +473,9 @@ int BEVDet::DeserializeTRTEngine(
   *engine_ptr = engine;
   for (int bi = 0; bi < engine->getNbBindings(); bi++) {
     if (engine->bindingIsInput(bi) == true) {
-      printf("Binding %d (%s): Input. \n", bi, engine->getBindingName(bi));
+      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Input." << std::endl;
     } else {
-      printf("Binding %d (%s): Output. \n", bi, engine->getBindingName(bi));
+      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Output." << std::endl;
     }
   }
   return EXIT_SUCCESS;
@@ -570,7 +574,7 @@ void BEVDet::GetCurr2AdjTransform(
 int BEVDet::DoInfer(
   const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
 {
-  printf("-------------------%d-------------------\n", idx + 1);
+  std::cout << "-------------------" << (idx + 1) << "-------------------" << std::endl;
 
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
@@ -584,7 +588,7 @@ int BEVDet::DoInfer(
     cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
   if (!trt_context->executeV2(trt_buffer_dev)) {
-    printf("BEVDet forward failing!\n");
+    std::cerr << "BEVDet forward failing!" << std::endl;
   }
 
   adj_frame_ptr->saveFrameBuffer(
@@ -603,11 +607,11 @@ int BEVDet::DoInfer(
   duration<double> post_t = post_end - end;
 
   cost_time = infer_t.count() * 1000;
-  printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
-  printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
-  printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
+  std::cout << "TRT-Engine  : " << engine_t.count() * 1000 << " ms" << std::endl;
+  std::cout << "Postprocess : " << post_t.count() * 1000 << " ms" << std::endl;
+  std::cout << "Inference   : " << infer_t.count() * 1000 << " ms" << std::endl;
 
-  printf("Detect %ld objects\n", out_detections.size());
+  std::cout << "Detect " << out_detections.size() << " objects" << std::endl;
 
   return EXIT_SUCCESS;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 678a29f168c17..d6124026b4302 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -42,32 +42,6 @@ uint8_t getSemanticType(const std::string & class_name)
   }
 }
 
-void Getinfo(void)
-{
-  cudaDeviceProp prop;
-
-  int count = 0;
-  cudaGetDeviceCount(&count);
-  printf("\nGPU has cuda devices: %d\n", count);
-  for (int i = 0; i < count; ++i) {
-    cudaGetDeviceProperties(&prop, i);
-    printf("----device id: %d info----\n", i);
-    printf("  GPU : %s \n", prop.name);
-    printf("  Capbility: %d.%d\n", prop.major, prop.minor);
-    printf("  Global memory: %luMB\n", prop.totalGlobalMem >> 20);
-    printf("  Const memory: %luKB\n", prop.totalConstMem >> 10);
-    printf("  Shared memory in a block: %luKB\n", prop.sharedMemPerBlock >> 10);
-    printf("  warp size: %d\n", prop.warpSize);
-    printf("  threads in a block: %d\n", prop.maxThreadsPerBlock);
-    printf(
-      "  block dim: (%d,%d,%d)\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1],
-      prop.maxThreadsDim[2]);
-    printf(
-      "  grid dim: (%d,%d,%d)\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
-  }
-  printf("\n");
-}
-
 void box3DToDetectedObjects(
   const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
   const std::vector<std::string> & class_names, float score_thre, const bool has_twist = true)

From 8253b07d3a921aeadeb4ecd526038b0c2f9d87dc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:17:59 +0000
Subject: [PATCH 37/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index e5597ab461be9..670bc3a77b65d 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -38,7 +38,8 @@ BEVDet::BEVDet(
 {
   InitParams(config_file);
   if (n_img != N_img) {
-    std::cerr << "BEVDet needs " << N_img << " images, but " << n_img << " images were given!" << std::endl;
+    std::cerr << "BEVDet needs " << N_img << " images, but " << n_img << " images were given!"
+              << std::endl;
   }
   auto start = high_resolution_clock::now();
 
@@ -58,7 +59,8 @@ BEVDet::BEVDet(
     std::cout << "Inference engine prepared." << std::endl;
   } else {
     // onnx to engine
-    std::cerr << "Could not find " << engine_file << ", trying to make TensorRT engine from ONNX." << std::endl;
+    std::cerr << "Could not find " << engine_file << ", trying to make TensorRT engine from ONNX."
+              << std::endl;
     ExportEngine(onnx_file, engine_file);
   }
   InitEngine(engine_file);  // FIXME
@@ -473,9 +475,11 @@ int BEVDet::DeserializeTRTEngine(
   *engine_ptr = engine;
   for (int bi = 0; bi < engine->getNbBindings(); bi++) {
     if (engine->bindingIsInput(bi) == true) {
-      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Input." << std::endl;
+      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Input."
+                << std::endl;
     } else {
-      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Output." << std::endl;
+      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Output."
+                << std::endl;
     }
   }
   return EXIT_SUCCESS;

From 2b8efc3238df75e68d85d667656ab3065490e7d4 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 15 Aug 2024 22:51:50 +0800
Subject: [PATCH 38/85] Replace printf with std::cout or std::cerr

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/common.hpp       |  2 +-
 .../autoware/tensorrt_bevdet/iou3d_nms.hpp    |  1 +
 .../tensorrt_bevdet/nvjpegdecoder.hpp         |  3 +-
 .../src/alignbev_plugin.cu                    |  2 +-
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   |  3 +-
 .../src/bevpool_plugin.cu                     | 14 ++---
 .../src/cpu_jpegdecoder.cpp                   | 10 ++--
 .../src/gatherbev_plugin.cu                   |  9 +--
 .../autoware_tensorrt_bevdet/src/iou3d_nms.cu | 60 +++++++++++--------
 .../src/nvjpegdecoder.cpp                     |  2 +-
 .../src/postprocess.cu                        |  7 ++-
 .../src/preprocess_plugin.cu                  |  6 +-
 12 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index 8d3ad49521a7c..3b5f6c2646dac 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -38,7 +38,7 @@ typedef unsigned char uchar;
 inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort = true)
 {
   if (code != cudaSuccess) {
-    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+    std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line << std::endl;
     if (abort) exit(code);
   }
 };
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
index 5a95be4465317..5ff94c25be7e4 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
@@ -32,6 +32,7 @@ All Rights Reserved 2019-2022.
 #include <thrust/transform.h>
 
 #include <iostream>
+#include <iomanip>
 #include <vector>
 
 const int THREADS_PER_BLOCK = 16;
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index a9cc280a2b5a0..ef1ade3845143 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -26,6 +26,7 @@
 #include <algorithm>
 #include <fstream>
 #include <iostream>
+#include <iomanip>
 #include <sstream>
 #include <string>
 #include <thread>
@@ -45,7 +46,7 @@
 inline void NvJpegAssert(nvjpegStatus_t code, const char * file, int line)
 {
   if (code != NVJPEG_STATUS_SUCCESS) {
-    std::cout << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line
+    std::cerr << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line
               << std::endl;
     exit(1);
   }
diff --git a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
index ecd51517f0de1..3218aa72befd8 100644
--- a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
@@ -217,7 +217,7 @@ int32_t AlignBEVPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      printf("\tUnsupport datatype!\n");
+      std::cerr << "\tUnsupported datatype!" << std::endl;
   }
 
   return 0;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 670bc3a77b65d..46808dbcbfa57 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -53,7 +53,7 @@ BEVDet::BEVDet(
     ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
-  std::cout << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
+  std::cout << std::fixed << std::setprecision(4) << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
 
   if (access(engine_file.c_str(), F_OK) == 0) {
     std::cout << "Inference engine prepared." << std::endl;
@@ -611,6 +611,7 @@ int BEVDet::DoInfer(
   duration<double> post_t = post_end - end;
 
   cost_time = infer_t.count() * 1000;
+  std::cout << std::fixed << std::setprecision(5); // Set precision for all floating-point outputs
   std::cout << "TRT-Engine  : " << engine_t.count() * 1000 << " ms" << std::endl;
   std::cout << "Postprocess : " << post_t.count() * 1000 << " ms" << std::endl;
   std::cout << "Inference   : " << infer_t.count() * 1000 << " ms" << std::endl;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
index d9d1b5f94211c..ac8b0c917b92a 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
@@ -179,14 +179,10 @@ int32_t BEVPoolPlugin::enqueue(
   dim3 grid(GET_BLOCKS(n_intervals * channel));
   dim3 block(NUM_THREADS);
 
-  // printf("BEVPool input depth %s\n", dataTypeToString(inputDesc[0].type).c_str());
-  // printf("BEVPool input  feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
-  // printf("BEVPool output feat %s\n", dataTypeToString(outputDesc[0].type).c_str());
-
   switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
       if (inputDesc[0].type == DataType::kFLOAT) {
-        // printf("bevpool : fp32 fp32\n");
+        // fp32 fp32
         bev_pool_v2_kernel<float, float><<<grid, block, 0, stream>>>(
           channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
@@ -194,7 +190,7 @@ int32_t BEVPoolPlugin::enqueue(
           reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
           reinterpret_cast<float *>(outputs[0]));
       } else {
-        // printf("bevpool : fp16 fp32\n");
+        // fp16 fp32
         bev_pool_v2_kernel<__half, float><<<grid, block, 0, stream>>>(
           channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
           reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
@@ -205,7 +201,7 @@ int32_t BEVPoolPlugin::enqueue(
       break;
     case int(DataType::kHALF):
       if (inputDesc[0].type == DataType::kFLOAT) {
-        // printf("bevpool : fp32 fp16\n");
+        // fp32 fp16
         bev_pool_v2_kernel<float, __half><<<grid, block, 0, stream>>>(
           channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
           reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
@@ -213,7 +209,7 @@ int32_t BEVPoolPlugin::enqueue(
           reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
           reinterpret_cast<__half *>(outputs[0]));
       } else {
-        // printf("bevpool : fp16 fp16\n");
+        // fp16 fp16
         bev_pool_v2_kernel<__half, __half><<<grid, block, 0, stream>>>(
           channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
           reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
@@ -223,7 +219,7 @@ int32_t BEVPoolPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      printf("\tUnsupport datatype!\n");
+      std::cerr << "\tUnsupported datatype!" << std::endl;
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index 4a24c02b4ca5e..f1eee428589e9 100644
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -41,25 +41,23 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
   };
 
   if (setjmp(jerr.setjmp_buffer)) {
-    printf(
-      "\033[31mFailed to decompress jpeg: %s\033[0m\n",
-      jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
+    std::cerr << "\033[31mFailed to decompress jpeg: " << jerr.pub.jpeg_message_table[jerr.pub.msg_code] << "\033[0m" << std::endl;
     jpeg_destroy_decompress(&cinfo);
     return EXIT_FAILURE;
   }
   jpeg_create_decompress(&cinfo);
 
   if (buffer.size() == 0) {
-    printf("buffer size is 0");
+    std::cerr << "buffer size is 0" << std::endl;
     return EXIT_FAILURE;
   }
   jpeg_mem_src(&cinfo, reinterpret_cast<const uchar *>(buffer.data()), buffer.size());
   if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
-    printf("\033[31mFailed to read jpeg header\033[0m\n");
+    std::cerr << "\033[31mFailed to read jpeg header\033[0m" << std::endl;
     return EXIT_FAILURE;
   }
   if (jpeg_start_decompress(&cinfo) != TRUE) {
-    printf("\033[31mFailed to start decompress\033[0m\n");
+    std::cerr << "\033[31mFailed to start decompress\033[0m" << std::endl;
     return EXIT_FAILURE;
   }
 
diff --git a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
index 64a5244696b10..638caa808f806 100644
--- a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -164,13 +164,10 @@ int32_t GatherBEVPlugin::enqueue(
 
   dim3 grid(GET_BLOCKS(nthreads));
   dim3 block(NUM_THREADS);
-  // printf("GatherBEV input adj_feats %s\n", dataTypeToString(inputDesc[0].type).c_str());
-  // printf("GatherBEV input curr_feat %s\n", dataTypeToString(inputDesc[1].type).c_str());
-  // printf("GatherBEV output bevfeats %s\n", dataTypeToString(outputDesc[0].type).c_str());
 
   switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
-      // printf("gather : fp32\n");
+      // fp32
       copy_feat_kernel<<<grid, block, 0, stream>>>(
         nthreads, adj_num, channel, map_size, reinterpret_cast<const float *>(inputs[0]),
         reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
@@ -178,14 +175,14 @@ int32_t GatherBEVPlugin::enqueue(
 
       break;
     case int(DataType::kHALF):
-      // printf("gather : fp16\n");
+      // fp16
       copy_feat_kernel<<<grid, block, 0, stream>>>(
         nthreads, adj_num, channel, map_size, reinterpret_cast<const __half *>(inputs[0]),
         reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
         reinterpret_cast<__half *>(outputs[0]));
       break;
     default:  // should NOT be here
-      printf("\tUnsupport datatype!\n");
+      std::cerr << "\tUnsupported datatype!" << std::endl;
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index d4302094466b7..0383b6c96f814 100644
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -131,11 +131,12 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   Point center_b(box_b[0], box_b[1]);
 
 #ifdef DEBUG
-  printf(
-    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", a_x1, a_y1, a_x2,
-    a_y2, a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
-  printf(
-    "center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y, center_b.x, center_b.y);
+  std::cout << "a: (" << std::fixed << std::setprecision(3) << a_x1 << ", "
+          << a_y1 << ", " << a_x2 << ", " << a_y2 << ", " << a_angle
+          << "), b: (" << b_x1 << ", " << b_y1 << ", " << b_x2 << ", "
+          << b_y2 << ", " << b_angle << ")" << std::endl;
+  std::cout << "center a: (" << center_a.x << ", " << center_a.y
+          << "), b: (" << center_b.x << ", " << center_b.y << ")" << std::endl;
 #endif
 
   Point box_a_corners[5];
@@ -156,16 +157,18 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
 
   for (int k = 0; k < 4; k++) {
 #ifdef DEBUG
-    printf(
-      "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x,
-      box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
+    std::cout << "before corner " << k << ": a("
+          << std::fixed << std::setprecision(3) << box_a_corners[k].x << ", "
+          << box_a_corners[k].y << "), b("
+          << box_b_corners[k].x << ", " << box_b_corners[k].y << ")" << std::endl;
 #endif
     rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
     rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
 #ifdef DEBUG
-    printf(
-      "corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y,
-      box_b_corners[k].x, box_b_corners[k].y);
+    std::cout << "corner " << k << ": a("
+          << std::fixed << std::setprecision(3) << box_a_corners[k].x << ", "
+          << box_a_corners[k].y << "), b("
+          << box_b_corners[k].x << ", " << box_b_corners[k].y << ")" << std::endl;
 #endif
   }
 
@@ -187,12 +190,14 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
         poly_center = poly_center + cross_points[cnt];
         cnt++;
 #ifdef DEBUG
-        printf(
-          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, "
-          "%.3f)->(%.3f, %.3f) \n",
-          cross_points[cnt - 1].x, cross_points[cnt - 1].y, box_a_corners[i].x, box_a_corners[i].y,
-          box_a_corners[i + 1].x, box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
-          box_b_corners[i + 1].x, box_b_corners[i + 1].y);
+        std::cout << "Cross points (" 
+          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", " 
+          << cross_points[cnt - 1].y << "): a("
+          << box_a_corners[i].x << ", " << box_a_corners[i].y << ")->("
+          << box_a_corners[i + 1].x << ", " << box_a_corners[i + 1].y << "), b("
+          << box_b_corners[i].x << ", " << box_b_corners[i].y << ")->("
+          << box_b_corners[i + 1].x << ", " << box_b_corners[i + 1].y << ")" 
+          << std::endl;
 #endif
       }
     }
@@ -205,8 +210,9 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_b_corners[k];
       cnt++;
 #ifdef DEBUG
-      printf(
-        "b corners in a: corner_b(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y);
+      std::cout << "b corners in a: corner_b("
+          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", "
+          << cross_points[cnt - 1].y << ")" << std::endl;
 #endif
     }
     if (check_in_box2d(box_b, box_a_corners[k])) {
@@ -214,8 +220,9 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_a_corners[k];
       cnt++;
 #ifdef DEBUG
-      printf(
-        "a corners in b: corner_a(%.3f, %.3f)", cross_points[cnt - 1].x, cross_points[cnt - 1].y);
+      std::cout << "a corners in b: corner_a("
+          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", "
+          << cross_points[cnt - 1].y << ")" << std::endl;
 #endif
     }
   }
@@ -236,9 +243,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   }
 
 #ifdef DEBUG
-  printf("cnt=%d\n", cnt);
+  std::cout << "cnt=" << cnt << std::endl;
   for (int i = 0; i < cnt; i++) {
-    printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x, cross_points[i].y);
+    std::cout << "All cross point " << i << ": ("
+          << std::fixed << std::setprecision(3) << cross_points[i].x << ", "
+          << cross_points[i].y << ")" << std::endl;
   }
 #endif
 
@@ -589,7 +598,6 @@ int Iou3dNmsCuda::DoIou3dNms(
   std::int64_t * host_keep_data)
 {
   const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);  // How many blocks are divided
-  // printf("box_num_pre=%d, col_blocks=%d\n", box_num_pre, col_blocks);
   std::int64_t * dev_mask = NULL;
   cudaMalloc((void **)&dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t));  //
   // THREADS_PER_BLOCK_NMS 掩码的长度
@@ -620,6 +628,8 @@ int Iou3dNmsCuda::DoIou3dNms(
     }
   }
 
-  if (cudaSuccess != cudaGetLastError()) printf("Error!\n");
+  if (cudaSuccess != cudaGetLastError()) {
+    std::cerr << "Error!" << std::endl;
+  }
   return num_to_keep;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 9d9fe9b68f051..eab9956f87936 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -237,7 +237,7 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   auto decode_end = high_resolution_clock::now();
   duration<double> decode_time = decode_end - decode_start;
 
-  printf("Decode total time : %.4lf ms\n", decode_time.count() * 1000);
+  std::cout << "Decode total time : " << std::fixed << std::setprecision(4) << (decode_time.count() * 1000) << " ms" << std::endl;
 
   for (size_t i = 0; i < files_data.size(); i++) {
     get_img(
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index c102425f44a9e..fad4dd194ae1d 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -117,7 +117,12 @@ PostprocessGPU::PostprocessGPU(
   iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
 
   for (auto i = 0; i < nms_rescale_factor.size(); i++) {
-    printf("%.2f%c", nms_rescale_factor[i], i == nms_rescale_factor.size() - 1 ? '\n' : ' ');
+    std::cout << std::fixed << std::setprecision(2) << nms_rescale_factor[i];
+        if (i == nms_rescale_factor.size() - 1) {
+            std::cout << std::endl;
+        } else {
+            std::cout << ' ';
+        }
   }
 }
 PostprocessGPU::~PostprocessGPU()
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
index 10dab65a9bef1..b0a1665b5938d 100644
--- a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
@@ -183,7 +183,7 @@ int32_t PreprocessPlugin::enqueue(
 
   switch (int(outputDesc[0].type)) {
     case int(DataType::kFLOAT):
-      // printf("pre : float\n");
+      // float
       preprocess_kernel<<<grid, block, 0, stream>>>(
         reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<float *>(outputs[0]),
         src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
@@ -192,7 +192,7 @@ int32_t PreprocessPlugin::enqueue(
         dst_img_h, dst_img_w, n_img);
       break;
     case int(DataType::kHALF):
-      // printf("pre : half\n");
+      // half
       preprocess_kernel<<<grid, block, 0, stream>>>(
         reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<__half *>(outputs[0]),
         src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
@@ -202,7 +202,7 @@ int32_t PreprocessPlugin::enqueue(
 
       break;
     default:  // should NOT be here
-      printf("\tUnsupport datatype!\n");
+      std::cerr << "\tUnsupported datatype!" << std::endl;
   }
   return 0;
 }

From 474e7a66103a47aefd4824ab2cfface8b280c258 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:55:30 +0000
Subject: [PATCH 39/85] style(pre-commit): autofix

---
 .../autoware/tensorrt_bevdet/common.hpp       |  3 +-
 .../autoware/tensorrt_bevdet/iou3d_nms.hpp    |  2 +-
 .../tensorrt_bevdet/nvjpegdecoder.hpp         |  2 +-
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   |  5 +-
 .../src/cpu_jpegdecoder.cpp                   |  3 +-
 .../autoware_tensorrt_bevdet/src/iou3d_nms.cu | 54 ++++++++-----------
 .../src/nvjpegdecoder.cpp                     |  3 +-
 .../src/postprocess.cu                        | 10 ++--
 8 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index 3b5f6c2646dac..ff9d4ec04fae7 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -38,7 +38,8 @@ typedef unsigned char uchar;
 inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort = true)
 {
   if (code != cudaSuccess) {
-    std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line << std::endl;
+    std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line
+              << std::endl;
     if (abort) exit(code);
   }
 };
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
index 5ff94c25be7e4..1d5f082704ac0 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
@@ -31,8 +31,8 @@ All Rights Reserved 2019-2022.
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 
-#include <iostream>
 #include <iomanip>
+#include <iostream>
 #include <vector>
 
 const int THREADS_PER_BLOCK = 16;
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index ef1ade3845143..535a2afc94d0a 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -25,8 +25,8 @@
 
 #include <algorithm>
 #include <fstream>
-#include <iostream>
 #include <iomanip>
+#include <iostream>
 #include <sstream>
 #include <string>
 #include <thread>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 46808dbcbfa57..bff321f40daac 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -53,7 +53,8 @@ BEVDet::BEVDet(
     ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
-  std::cout << std::fixed << std::setprecision(4) << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
+  std::cout << std::fixed << std::setprecision(4)
+            << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
 
   if (access(engine_file.c_str(), F_OK) == 0) {
     std::cout << "Inference engine prepared." << std::endl;
@@ -611,7 +612,7 @@ int BEVDet::DoInfer(
   duration<double> post_t = post_end - end;
 
   cost_time = infer_t.count() * 1000;
-  std::cout << std::fixed << std::setprecision(5); // Set precision for all floating-point outputs
+  std::cout << std::fixed << std::setprecision(5);  // Set precision for all floating-point outputs
   std::cout << "TRT-Engine  : " << engine_t.count() * 1000 << " ms" << std::endl;
   std::cout << "Postprocess : " << post_t.count() * 1000 << " ms" << std::endl;
   std::cout << "Inference   : " << infer_t.count() * 1000 << " ms" << std::endl;
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index f1eee428589e9..0289811e220e9 100644
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -41,7 +41,8 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
   };
 
   if (setjmp(jerr.setjmp_buffer)) {
-    std::cerr << "\033[31mFailed to decompress jpeg: " << jerr.pub.jpeg_message_table[jerr.pub.msg_code] << "\033[0m" << std::endl;
+    std::cerr << "\033[31mFailed to decompress jpeg: "
+              << jerr.pub.jpeg_message_table[jerr.pub.msg_code] << "\033[0m" << std::endl;
     jpeg_destroy_decompress(&cinfo);
     return EXIT_FAILURE;
   }
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index 0383b6c96f814..93a835ae3889d 100644
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -131,12 +131,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   Point center_b(box_b[0], box_b[1]);
 
 #ifdef DEBUG
-  std::cout << "a: (" << std::fixed << std::setprecision(3) << a_x1 << ", "
-          << a_y1 << ", " << a_x2 << ", " << a_y2 << ", " << a_angle
-          << "), b: (" << b_x1 << ", " << b_y1 << ", " << b_x2 << ", "
-          << b_y2 << ", " << b_angle << ")" << std::endl;
-  std::cout << "center a: (" << center_a.x << ", " << center_a.y
-          << "), b: (" << center_b.x << ", " << center_b.y << ")" << std::endl;
+  std::cout << "a: (" << std::fixed << std::setprecision(3) << a_x1 << ", " << a_y1 << ", " << a_x2
+            << ", " << a_y2 << ", " << a_angle << "), b: (" << b_x1 << ", " << b_y1 << ", " << b_x2
+            << ", " << b_y2 << ", " << b_angle << ")" << std::endl;
+  std::cout << "center a: (" << center_a.x << ", " << center_a.y << "), b: (" << center_b.x << ", "
+            << center_b.y << ")" << std::endl;
 #endif
 
   Point box_a_corners[5];
@@ -157,18 +156,16 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
 
   for (int k = 0; k < 4; k++) {
 #ifdef DEBUG
-    std::cout << "before corner " << k << ": a("
-          << std::fixed << std::setprecision(3) << box_a_corners[k].x << ", "
-          << box_a_corners[k].y << "), b("
-          << box_b_corners[k].x << ", " << box_b_corners[k].y << ")" << std::endl;
+    std::cout << "before corner " << k << ": a(" << std::fixed << std::setprecision(3)
+              << box_a_corners[k].x << ", " << box_a_corners[k].y << "), b(" << box_b_corners[k].x
+              << ", " << box_b_corners[k].y << ")" << std::endl;
 #endif
     rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
     rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
 #ifdef DEBUG
-    std::cout << "corner " << k << ": a("
-          << std::fixed << std::setprecision(3) << box_a_corners[k].x << ", "
-          << box_a_corners[k].y << "), b("
-          << box_b_corners[k].x << ", " << box_b_corners[k].y << ")" << std::endl;
+    std::cout << "corner " << k << ": a(" << std::fixed << std::setprecision(3)
+              << box_a_corners[k].x << ", " << box_a_corners[k].y << "), b(" << box_b_corners[k].x
+              << ", " << box_b_corners[k].y << ")" << std::endl;
 #endif
   }
 
@@ -190,14 +187,12 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
         poly_center = poly_center + cross_points[cnt];
         cnt++;
 #ifdef DEBUG
-        std::cout << "Cross points (" 
-          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", " 
-          << cross_points[cnt - 1].y << "): a("
-          << box_a_corners[i].x << ", " << box_a_corners[i].y << ")->("
-          << box_a_corners[i + 1].x << ", " << box_a_corners[i + 1].y << "), b("
-          << box_b_corners[i].x << ", " << box_b_corners[i].y << ")->("
-          << box_b_corners[i + 1].x << ", " << box_b_corners[i + 1].y << ")" 
-          << std::endl;
+        std::cout << "Cross points (" << std::fixed << std::setprecision(3)
+                  << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << "): a("
+                  << box_a_corners[i].x << ", " << box_a_corners[i].y << ")->("
+                  << box_a_corners[i + 1].x << ", " << box_a_corners[i + 1].y << "), b("
+                  << box_b_corners[i].x << ", " << box_b_corners[i].y << ")->("
+                  << box_b_corners[i + 1].x << ", " << box_b_corners[i + 1].y << ")" << std::endl;
 #endif
       }
     }
@@ -210,9 +205,8 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_b_corners[k];
       cnt++;
 #ifdef DEBUG
-      std::cout << "b corners in a: corner_b("
-          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", "
-          << cross_points[cnt - 1].y << ")" << std::endl;
+      std::cout << "b corners in a: corner_b(" << std::fixed << std::setprecision(3)
+                << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << ")" << std::endl;
 #endif
     }
     if (check_in_box2d(box_b, box_a_corners[k])) {
@@ -220,9 +214,8 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_a_corners[k];
       cnt++;
 #ifdef DEBUG
-      std::cout << "a corners in b: corner_a("
-          << std::fixed << std::setprecision(3) << cross_points[cnt - 1].x << ", "
-          << cross_points[cnt - 1].y << ")" << std::endl;
+      std::cout << "a corners in b: corner_a(" << std::fixed << std::setprecision(3)
+                << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << ")" << std::endl;
 #endif
     }
   }
@@ -245,9 +238,8 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
 #ifdef DEBUG
   std::cout << "cnt=" << cnt << std::endl;
   for (int i = 0; i < cnt; i++) {
-    std::cout << "All cross point " << i << ": ("
-          << std::fixed << std::setprecision(3) << cross_points[i].x << ", "
-          << cross_points[i].y << ")" << std::endl;
+    std::cout << "All cross point " << i << ": (" << std::fixed << std::setprecision(3)
+              << cross_points[i].x << ", " << cross_points[i].y << ")" << std::endl;
   }
 #endif
 
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index eab9956f87936..45b95801e1684 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -237,7 +237,8 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   auto decode_end = high_resolution_clock::now();
   duration<double> decode_time = decode_end - decode_start;
 
-  std::cout << "Decode total time : " << std::fixed << std::setprecision(4) << (decode_time.count() * 1000) << " ms" << std::endl;
+  std::cout << "Decode total time : " << std::fixed << std::setprecision(4)
+            << (decode_time.count() * 1000) << " ms" << std::endl;
 
   for (size_t i = 0; i < files_data.size(); i++) {
     get_img(
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index fad4dd194ae1d..f418b2d6aee23 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -118,11 +118,11 @@ PostprocessGPU::PostprocessGPU(
 
   for (auto i = 0; i < nms_rescale_factor.size(); i++) {
     std::cout << std::fixed << std::setprecision(2) << nms_rescale_factor[i];
-        if (i == nms_rescale_factor.size() - 1) {
-            std::cout << std::endl;
-        } else {
-            std::cout << ' ';
-        }
+    if (i == nms_rescale_factor.size() - 1) {
+      std::cout << std::endl;
+    } else {
+      std::cout << ' ';
+    }
   }
 }
 PostprocessGPU::~PostprocessGPU()

From d425c0ad4a80126fcb6157929de7b81e032251aa Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 19 Aug 2024 18:22:21 +0800
Subject: [PATCH 40/85] fix: use english comment

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp           | 2 +-
 perception/autoware_tensorrt_bevdet/src/postprocess.cu       | 4 ++--
 perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index bff321f40daac..0c3295f802ea3 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -496,7 +496,7 @@ void BEVDet::GetAdjBEVFeature(
     flag = 0;
   }
 
-  // idx越小, adj_bevfeat越新
+  // the smaller the idx, the newer th adj_bevfeat
   for (int i = 0; i < adj_num; i++) {
     const void * adj_buffer = adj_frame_ptr->getFrameBuffer(i);
 
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index f418b2d6aee23..df8376c37df1a 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -162,8 +162,8 @@ void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box> &
       score_dev, cls_dev, valid_box_num, nms_rescale_factor_dev);
 
     /*
-    此时 boxes_dev, score_dev, cls_dev 有 valid_box_num 个元素，可能大于nms_pre_maxnum,
-    而且是无序排列的
+    at this point, boxes_dev, score_dev, cls_dev have valid_box_num elements，which may be greater than nms_pre_maxnum,
+    and it's arranged in disorder
     */
     int box_num_pre = 0;
     CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
index b0a1665b5938d..401bc889106fb 100644
--- a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
@@ -131,7 +131,7 @@ bool PreprocessPlugin::supportsFormatCombination(
     case 2:  // std
       res = (inOut[2].type == DataType::kFLOAT) && inOut[2].format == TensorFormat::kLINEAR;
       break;
-    case 3:  // 输出 img tensor
+    case 3:  // output img tensor
       res = (inOut[3].type == DataType::kFLOAT || inOut[3].type == DataType::kHALF) &&
             inOut[3].format == inOut[0].format;
 

From 6676d9302f92551315b5a94bcdaa5de312c09b06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 10:25:01 +0000
Subject: [PATCH 41/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/postprocess.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index df8376c37df1a..1963320fb6fbf 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -162,8 +162,8 @@ void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box> &
       score_dev, cls_dev, valid_box_num, nms_rescale_factor_dev);
 
     /*
-    at this point, boxes_dev, score_dev, cls_dev have valid_box_num elements，which may be greater than nms_pre_maxnum,
-    and it's arranged in disorder
+    at this point, boxes_dev, score_dev, cls_dev have valid_box_num elements，which may be greater
+    than nms_pre_maxnum, and it's arranged in disorder
     */
     int box_num_pre = 0;
     CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));

From 30d208c1355b94d4fd7c4e923dc371f466193acf Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 20 Aug 2024 15:25:47 +0800
Subject: [PATCH 42/85] fix: replace cpp std log with ros2 log

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/common.hpp       |  22 ++-
 .../include/autoware/tensorrt_bevdet/data.hpp |   5 -
 .../autoware/tensorrt_bevdet/iou3d_nms.hpp    |   1 -
 .../tensorrt_bevdet/nvjpegdecoder.hpp         |  15 +-
 .../src/alignbev_plugin.cu                    |   5 +-
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 160 +++++++++++++-----
 .../src/bevpool_plugin.cu                     |   5 +-
 .../src/cpu_jpegdecoder.cpp                   |  22 ++-
 .../autoware_tensorrt_bevdet/src/data.cpp     |  42 +----
 .../src/gatherbev_plugin.cu                   |   5 +-
 .../autoware_tensorrt_bevdet/src/iou3d_nms.cu |  86 +++++++---
 .../src/nvjpegdecoder.cpp                     |  12 +-
 .../src/postprocess.cu                        |  13 +-
 .../src/preprocess_plugin.cu                  |   5 +-
 14 files changed, 257 insertions(+), 141 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index ff9d4ec04fae7..4eaead125055a 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -23,6 +23,8 @@
 
 #include <iostream>
 #include <string>
+#include "rclcpp/logger.hpp"
+#include "rclcpp/rclcpp.hpp"
 
 typedef unsigned char uchar;
 
@@ -38,8 +40,13 @@ typedef unsigned char uchar;
 inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort = true)
 {
   if (code != cudaSuccess) {
-    std::cerr << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line
-              << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("GPUAssert"), 
+      "GPUassert: %s %s %d",                     
+      cudaGetErrorString(code),                  
+      file,                                      
+      line                                       
+    );
     if (abort) exit(code);
   }
 };
@@ -122,22 +129,21 @@ class Logger : public nvinfer1::ILogger
     if (severity > reportable_severity) return;
     switch (severity) {
       case Severity::kINTERNAL_ERROR:
-        std::cerr << "INTERNAL_ERROR: ";
+        RCLCPP_ERROR(rclcpp::get_logger("autoware_tensorrt_bevdet"), "INTERNAL_ERROR: %s", msg);
         break;
       case Severity::kERROR:
-        std::cerr << "ERROR: ";
+        RCLCPP_ERROR(rclcpp::get_logger("autoware_tensorrt_bevdet"), "ERROR: %s", msg);
         break;
       case Severity::kWARNING:
-        std::cerr << "WARNING: ";
+        RCLCPP_WARN(rclcpp::get_logger("autoware_tensorrt_bevdet"), "WARNING: %s", msg);
         break;
       case Severity::kINFO:
-        std::cerr << "INFO: ";
+        RCLCPP_INFO(rclcpp::get_logger("autoware_tensorrt_bevdet"), "INFO: %s", msg);
         break;
       default:
-        std::cerr << "UNKNOWN: ";
+        RCLCPP_INFO(rclcpp::get_logger("autoware_tensorrt_bevdet"), "UNKNOWN: %s", msg);
         break;
     }
-    std::cerr << msg << std::endl;
   }
 
   Severity reportable_severity;
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 2afc30bd43f70..bfd75f1ebe191 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -60,9 +60,4 @@ Eigen::Translation3f fromYamlTrans(YAML::Node x);
 Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
 Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
 
-int read_image(const std::string & image_names, std::vector<char> & raw_data);
-
-int read_sample(
-  const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data);
-
 #endif  // AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
index 1d5f082704ac0..5a95be4465317 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
@@ -31,7 +31,6 @@ All Rights Reserved 2019-2022.
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 
-#include <iomanip>
 #include <iostream>
 #include <vector>
 
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index 535a2afc94d0a..450ac3edfe6bd 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -25,7 +25,6 @@
 
 #include <algorithm>
 #include <fstream>
-#include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <string>
@@ -46,8 +45,13 @@
 inline void NvJpegAssert(nvjpegStatus_t code, const char * file, int line)
 {
   if (code != NVJPEG_STATUS_SUCCESS) {
-    std::cerr << "NVJPEG failure: '#" << code << "' at " << std::string(file) << ":" << line
-              << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("autoware_tensorrt_bevdet"), 
+      "NVJPEG failure: '#%d' at %s:%d", 
+      code,
+      file, 
+      line 
+    ); 
     exit(1);
   }
 }
@@ -78,7 +82,10 @@ struct share_params
       _fmt == DECODE_UNCHANGED) {
       fmt = (nvjpegOutputFormat_t)_fmt;
     } else {
-      std::cerr << "Unknown format! Auto switch BGR!" << std::endl;
+      RCLCPP_WARN(
+        rclcpp::get_logger("autoware_tensorrt_bevdet"), 
+        "Unknown format! Auto switch BGR!" 
+      );
       fmt = NVJPEG_OUTPUT_BGR;
     }
   }
diff --git a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
index 3218aa72befd8..a23a30fd29d04 100644
--- a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
@@ -217,7 +217,10 @@ int32_t AlignBEVPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      std::cerr << "\tUnsupported datatype!" << std::endl;
+      RCLCPP_ERROR(
+        rclcpp::get_logger("AlignBEVPlugin"), 
+        "\tUnsupported datatype!"
+      );
   }
 
   return 0;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 0c3295f802ea3..d9e70666cbaa5 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -38,8 +38,12 @@ BEVDet::BEVDet(
 {
   InitParams(config_file);
   if (n_img != N_img) {
-    std::cerr << "BEVDet needs " << N_img << " images, but " << n_img << " images were given!"
-              << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "BEVDet need %d images, but given %d images!", 
+      N_img, 
+      n_img 
+    );
   }
   auto start = high_resolution_clock::now();
 
@@ -53,15 +57,24 @@ BEVDet::BEVDet(
     ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
-  std::cout << std::fixed << std::setprecision(4)
-            << "InitVewTransformer cost time: " << t.count() * 1000 << " ms" << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "InitVewTransformer cost time : %.4lf ms",
+    t.count() * 1000 
+  );
 
   if (access(engine_file.c_str(), F_OK) == 0) {
-    std::cout << "Inference engine prepared." << std::endl;
+    RCLCPP_INFO(
+      rclcpp::get_logger("BEVDet"), 
+      "Inference engine prepared."
+    );
   } else {
     // onnx to engine
-    std::cerr << "Could not find " << engine_file << ", trying to make TensorRT engine from ONNX."
-              << std::endl;
+    RCLCPP_WARN(
+      rclcpp::get_logger("BEVDet"), 
+      "Could not find %s, try making TensorRT engine from onnx", 
+      engine_file.c_str() 
+    );
     ExportEngine(onnx_file, engine_file);
   }
   InitEngine(engine_file);  // FIXME
@@ -240,7 +253,11 @@ void BEVDet::MallocDeviceMemory()
     CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
   }
 
-  std::cout << "img num binding : " << trt_engine->getNbBindings() << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "img num binding : %d", 
+    trt_engine->getNbBindings() 
+  );
 
   post_buffer = reinterpret_cast<void **>(new void *[class_num_pre_task.size() * 6]);
   for (size_t i = 0; i < class_num_pre_task.size(); i++) {
@@ -378,20 +395,30 @@ void BEVDet::InitViewTransformer(
   interval_starts_ptr.reset(interval_starts_host_ptr);
   interval_lengths_ptr.reset(interval_lengths_host_ptr);
 
-  std::cout << "valid_feat_num: " << valid_feat_num << std::endl;
-  std::cout << "unique_bev_num: " << unique_bev_num << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "valid_feat_num: %d", 
+     valid_feat_num
+  );
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "unique_bev_num: %d", 
+    unique_bev_num
+  );
 }
 
-void print_dim(nvinfer1::Dims dim)
+void print_dim(nvinfer1::Dims dim, std::string name)
 {
+  std::ostringstream oss;
+  oss << name << " : ";
   for (auto i = 0; i < dim.nbDims; i++) {
-    std::cout << dim.d[i];
-    if (i == dim.nbDims - 1) {
-      std::cout << std::endl;
-    } else {
-      std::cout << " ";
-    }
+    oss << dim.d[i] << ' ';
   }
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "%s", 
+    oss.str().c_str() 
+  );
 }
 
 int BEVDet::InitEngine(const std::string & engine_file)
@@ -401,13 +428,19 @@ int BEVDet::InitEngine(const std::string & engine_file)
   }
 
   if (trt_engine == nullptr) {
-    std::cerr << "Failed to deserialize engine file!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed to deserialize engine file!"
+    );
     return EXIT_FAILURE;
   }
   trt_context = trt_engine->createExecutionContext();
 
   if (trt_context == nullptr) {
-    std::cerr << "Failed to create TensorRT Execution Context!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed to create TensorRT Execution Context!"
+    );
     return EXIT_FAILURE;
   }
 
@@ -435,10 +468,8 @@ int BEVDet::InitEngine(const std::string & engine_file)
     auto dim = trt_context->getBindingDimensions(i);
     auto name = trt_engine->getBindingName(i);
     buffer_map[name] = i;
-    std::cout << name << " : ";
-    print_dim(dim);
+    print_dim(dim, name);
   }
-  std::cout << std::endl;
 
   return EXIT_SUCCESS;
 }
@@ -476,11 +507,19 @@ int BEVDet::DeserializeTRTEngine(
   *engine_ptr = engine;
   for (int bi = 0; bi < engine->getNbBindings(); bi++) {
     if (engine->bindingIsInput(bi) == true) {
-      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Input."
-                << std::endl;
+      RCLCPP_INFO(
+        rclcpp::get_logger("BEVDet"), 
+        "Binding %d (%s): Input.", 
+        bi, 
+        engine->getBindingName(bi) 
+      );
     } else {
-      std::cout << "Binding " << bi << " (" << engine->getBindingName(bi) << "): Output."
-                << std::endl;
+      RCLCPP_INFO(
+        rclcpp::get_logger("BEVDet"), 
+        "Binding %d (%s): Output.", 
+        bi, 
+        engine->getBindingName(bi) 
+      );
     }
   }
   return EXIT_SUCCESS;
@@ -579,7 +618,7 @@ void BEVDet::GetCurr2AdjTransform(
 int BEVDet::DoInfer(
   const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
 {
-  std::cout << "-------------------" << (idx + 1) << "-------------------" << std::endl;
+  printf("-------------------%d-------------------\n", idx + 1);
 
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
@@ -593,7 +632,10 @@ int BEVDet::DoInfer(
     cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
   if (!trt_context->executeV2(trt_buffer_dev)) {
-    std::cerr << "BEVDet forward failing!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "BEVDet forward failing!" 
+    );
   }
 
   adj_frame_ptr->saveFrameBuffer(
@@ -612,12 +654,11 @@ int BEVDet::DoInfer(
   duration<double> post_t = post_end - end;
 
   cost_time = infer_t.count() * 1000;
-  std::cout << std::fixed << std::setprecision(5);  // Set precision for all floating-point outputs
-  std::cout << "TRT-Engine  : " << engine_t.count() * 1000 << " ms" << std::endl;
-  std::cout << "Postprocess : " << post_t.count() * 1000 << " ms" << std::endl;
-  std::cout << "Inference   : " << infer_t.count() * 1000 << " ms" << std::endl;
+  printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
+  printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
+  printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
 
-  std::cout << "Detect " << out_detections.size() << " objects" << std::endl;
+  printf("Detect %ld objects\n", out_detections.size());
 
   return EXIT_SUCCESS;
 }
@@ -652,16 +693,26 @@ void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtF
   nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger);
 
   if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger.reportable_severity))) {
-    std::cout << std::string("Failed parsing .onnx file!") << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed parsing .onnx file!" 
+    );
     for (int i = 0; i < parser->getNbErrors(); ++i) {
       auto * error = parser->getError(i);
-      std::cout << std::to_string(static_cast<int>(error->code())) << std::string(":")
-                << std::string(error->desc()) << std::endl;
+      RCLCPP_ERROR(
+        rclcpp::get_logger("BEVDet"),
+        "Error code: %d, Description: %s",
+        static_cast<int>(error->code()),
+        error->desc()
+      );
     }
 
     return;
   }
-  std::cout << std::string("Succeeded parsing .onnx file!") << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "Succeeded parsing .onnx file!" 
+  );
 
   for (size_t i = 0; i < min_shapes.size(); i++) {
     nvinfer1::ITensor * it = network->getInput(i);
@@ -673,30 +724,51 @@ void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtF
 
   nvinfer1::IHostMemory * engineString = builder->buildSerializedNetwork(*network, *config);
   if (engineString == nullptr || engineString->size() == 0) {
-    std::cout << "Failed building serialized engine!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed building serialized engine!" 
+    );
     return;
   }
-  std::cout << "Succeeded building serialized engine!" << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "Succeeded building serialized engine!" 
+  );
 
   nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger)};
   engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
   if (engine == nullptr) {
-    std::cout << "Failed building engine!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed building engine!" 
+    );
     return;
   }
-  std::cout << "Succeeded building engine!" << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "Succeeded building engine!" 
+  );
 
   std::ofstream engineFile(trtFile, std::ios::binary);
   if (!engineFile) {
-    std::cout << "Failed opening file to write" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed opening file to write" 
+    );
     return;
   }
   engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
   if (engineFile.fail()) {
-    std::cout << "Failed saving .engine file!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("BEVDet"), 
+      "Failed saving .engine file!" 
+    );
     return;
   }
-  std::cout << "Succeeded saving .engine file!" << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("BEVDet"), 
+    "Succeeded saving .engine file!" 
+  );
 }
 
 BEVDet::~BEVDet()
diff --git a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
index ac8b0c917b92a..ca11b679f67db 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
@@ -219,7 +219,10 @@ int32_t BEVPoolPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      std::cerr << "\tUnsupported datatype!" << std::endl;
+      RCLCPP_ERROR(
+        rclcpp::get_logger("BEVPoolPlugin"), 
+        "\tUnsupport datatype!"
+      );
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index 0289811e220e9..295a00c392b81 100644
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -41,24 +41,36 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
   };
 
   if (setjmp(jerr.setjmp_buffer)) {
-    std::cerr << "\033[31mFailed to decompress jpeg: "
-              << jerr.pub.jpeg_message_table[jerr.pub.msg_code] << "\033[0m" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("decode_jpeg"), 
+      "Failed to decompress jpeg: %s", 
+      jerr.pub.jpeg_message_table[jerr.pub.msg_code] 
+    );
     jpeg_destroy_decompress(&cinfo);
     return EXIT_FAILURE;
   }
   jpeg_create_decompress(&cinfo);
 
   if (buffer.size() == 0) {
-    std::cerr << "buffer size is 0" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("decode_jpeg"),
+      "buffer size is 0"
+    );
     return EXIT_FAILURE;
   }
   jpeg_mem_src(&cinfo, reinterpret_cast<const uchar *>(buffer.data()), buffer.size());
   if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
-    std::cerr << "\033[31mFailed to read jpeg header\033[0m" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("decode_jpeg"), 
+      "Failed to read jpeg header" 
+    );
     return EXIT_FAILURE;
   }
   if (jpeg_start_decompress(&cinfo) != TRUE) {
-    std::cerr << "\033[31mFailed to start decompress\033[0m" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("decode_jpeg"), 
+      "Failed to start decompress" 
+    );
     return EXIT_FAILURE;
   }
 
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index 3b7783c4b8dc0..0cca4263dbd60 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -29,8 +29,12 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
 : N_img(n)
 {
   if (static_cast<size_t>(n) != cams_name.size()) {
-    std::cerr << "Error! Need " << n << " camera param, bug given " << cams_name.size()
-              << " camera names!" << std::endl;
+    RCLCPP_ERROR(
+      rclcpp::get_logger("camParams"), 
+      "Error! Need %d camera param, but given %zu camera names!", 
+      n,
+      cams_name.size()
+    );
   }
   ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
   ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
@@ -58,40 +62,6 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
   }
 }
 
-int read_image(const std::string & image_names, std::vector<char> & raw_data)
-{
-  std::ifstream input(image_names.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
-
-  if (!(input.is_open())) {
-    std::cerr << "Cannot open image: " << image_names << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  std::streamsize file_size = input.tellg();
-  input.seekg(0, std::ios::beg);
-  if (raw_data.size() < static_cast<size_t>(file_size)) {
-    raw_data.resize(file_size);
-  }
-  if (!input.read(raw_data.data(), file_size)) {
-    std::cerr << "Cannot read from file: " << image_names << std::endl;
-    return EXIT_FAILURE;
-  }
-  return EXIT_SUCCESS;
-}
-
-int read_sample(
-  const std::vector<std::string> & imgs_file, std::vector<std::vector<char>> & imgs_data)
-{
-  imgs_data.resize(imgs_file.size());
-
-  for (size_t i = 0; i < imgs_data.size(); i++) {
-    if (read_image(imgs_file[i], imgs_data[i])) {
-      return EXIT_FAILURE;
-    }
-  }
-  return EXIT_SUCCESS;
-}
-
 Eigen::Translation3f fromYamlTrans(YAML::Node x)
 {
   std::vector<float> trans = x.as<std::vector<float>>();
diff --git a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
index 638caa808f806..b62fe057c415f 100644
--- a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -182,7 +182,10 @@ int32_t GatherBEVPlugin::enqueue(
         reinterpret_cast<__half *>(outputs[0]));
       break;
     default:  // should NOT be here
-      std::cerr << "\tUnsupported datatype!" << std::endl;
+      RCLCPP_ERROR(
+        rclcpp::get_logger("GatherBEVPlugin"), 
+        "\tUnsupport datatype!" 
+      );
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index 93a835ae3889d..314eff52681a9 100644
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -131,11 +131,18 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   Point center_b(box_b[0], box_b[1]);
 
 #ifdef DEBUG
-  std::cout << "a: (" << std::fixed << std::setprecision(3) << a_x1 << ", " << a_y1 << ", " << a_x2
-            << ", " << a_y2 << ", " << a_angle << "), b: (" << b_x1 << ", " << b_y1 << ", " << b_x2
-            << ", " << b_y2 << ", " << b_angle << ")" << std::endl;
-  std::cout << "center a: (" << center_a.x << ", " << center_a.y << "), b: (" << center_b.x << ", "
-            << center_b.y << ")" << std::endl;
+  RCLCPP_DEBUG(
+    rclcpp::get_logger("iou3d_nms"),
+    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)",
+    a_x1, a_y1, a_x2, a_y2, a_angle, 
+    b_x1, b_y1, b_x2, b_y2, b_angle 
+  );
+  RCLCPP_DEBUG(
+    rclcpp::get_logger("iou3d_nms"),
+    "center a: (%.3f, %.3f), b: (%.3f, %.3f)",
+    center_a.x, center_a.y, 
+    center_b.x, center_b.y 
+  );
 #endif
 
   Point box_a_corners[5];
@@ -156,16 +163,24 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
 
   for (int k = 0; k < 4; k++) {
 #ifdef DEBUG
-    std::cout << "before corner " << k << ": a(" << std::fixed << std::setprecision(3)
-              << box_a_corners[k].x << ", " << box_a_corners[k].y << "), b(" << box_b_corners[k].x
-              << ", " << box_b_corners[k].y << ")" << std::endl;
+    RCLCPP_DEBUG(
+      rclcpp::get_logger("iou3d_nms"), 
+      "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)",
+      k,                           
+      box_a_corners[k].x, box_a_corners[k].y,  
+      box_b_corners[k].x, box_b_corners[k].y  
+    );
 #endif
     rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
     rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
 #ifdef DEBUG
-    std::cout << "corner " << k << ": a(" << std::fixed << std::setprecision(3)
-              << box_a_corners[k].x << ", " << box_a_corners[k].y << "), b(" << box_b_corners[k].x
-              << ", " << box_b_corners[k].y << ")" << std::endl;
+    RCLCPP_DEBUG(
+      rclcpp::get_logger("iou3d_nms"),
+      "corner %d: a(%.3f, %.3f), b(%.3f, %.3f)",
+      k,                           
+      box_a_corners[k].x, box_a_corners[k].y,  
+      box_b_corners[k].x, box_b_corners[k].y  
+    );
 #endif
   }
 
@@ -187,12 +202,15 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
         poly_center = poly_center + cross_points[cnt];
         cnt++;
 #ifdef DEBUG
-        std::cout << "Cross points (" << std::fixed << std::setprecision(3)
-                  << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << "): a("
-                  << box_a_corners[i].x << ", " << box_a_corners[i].y << ")->("
-                  << box_a_corners[i + 1].x << ", " << box_a_corners[i + 1].y << "), b("
-                  << box_b_corners[i].x << ", " << box_b_corners[i].y << ")->("
-                  << box_b_corners[i + 1].x << ", " << box_b_corners[i + 1].y << ")" << std::endl;
+        RCLCPP_DEBUG(
+          rclcpp::get_logger("iou3d_nms"), 
+          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f)", 
+          cross_points[cnt - 1].x, cross_points[cnt - 1].y,  
+          box_a_corners[i].x, box_a_corners[i].y,           
+          box_a_corners[i + 1].x, box_a_corners[i + 1].y,   
+          box_b_corners[i].x, box_b_corners[i].y,          
+          box_b_corners[i + 1].x, box_b_corners[i + 1].y 
+        );
 #endif
       }
     }
@@ -205,8 +223,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_b_corners[k];
       cnt++;
 #ifdef DEBUG
-      std::cout << "b corners in a: corner_b(" << std::fixed << std::setprecision(3)
-                << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << ")" << std::endl;
+      RCLCPP_DEBUG(
+        rclcpp::get_logger("iou3d_nms"), 
+        "b corners in a: corner_b(%.3f, %.3f)", 
+        cross_points[cnt - 1].x, cross_points[cnt - 1].y 
+      );
 #endif
     }
     if (check_in_box2d(box_b, box_a_corners[k])) {
@@ -214,8 +235,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cross_points[cnt] = box_a_corners[k];
       cnt++;
 #ifdef DEBUG
-      std::cout << "a corners in b: corner_a(" << std::fixed << std::setprecision(3)
-                << cross_points[cnt - 1].x << ", " << cross_points[cnt - 1].y << ")" << std::endl;
+      RCLCPP_DEBUG(
+        rclcpp::get_logger("iou3d_nms"), 
+        "a corners in b: corner_a(%.3f, %.3f)", 
+        cross_points[cnt - 1].x, cross_points[cnt - 1].y 
+      );
 #endif
     }
   }
@@ -236,10 +260,18 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   }
 
 #ifdef DEBUG
-  std::cout << "cnt=" << cnt << std::endl;
+  RCLCPP_DEBUG(
+    rclcpp::get_logger("iou3d_nms"), 
+    "cnt=%d", 
+    cnt 
+  );
   for (int i = 0; i < cnt; i++) {
-    std::cout << "All cross point " << i << ": (" << std::fixed << std::setprecision(3)
-              << cross_points[i].x << ", " << cross_points[i].y << ")" << std::endl;
+    RCLCPP_DEBUG(
+      rclcpp::get_logger("iou3d_nms"), 
+      "All cross point %d: (%.3f, %.3f)", 
+      i,                             
+      cross_points[i].x, cross_points[i].y 
+    );
   }
 #endif
 
@@ -621,7 +653,11 @@ int Iou3dNmsCuda::DoIou3dNms(
   }
 
   if (cudaSuccess != cudaGetLastError()) {
-    std::cerr << "Error!" << std::endl;
+    RCLCPP_ERROR(
+        rclcpp::get_logger("iou3d_nms"), 
+        "Error: %s", 
+        cudaGetErrorString(cudaGetLastError()) 
+    );
   }
   return num_to_keep;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 45b95801e1684..ed467db2b8a74 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -237,8 +237,11 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   auto decode_end = high_resolution_clock::now();
   duration<double> decode_time = decode_end - decode_start;
 
-  std::cout << "Decode total time : " << std::fixed << std::setprecision(4)
-            << (decode_time.count() * 1000) << " ms" << std::endl;
+  RCLCPP_INFO(
+    rclcpp::get_logger("nvjpegDecoder"), 
+    "Decode total time : %.4lf ms",
+    decode_time.count() * 1000
+  );
 
   for (size_t i = 0; i < files_data.size(); i++) {
     get_img(
@@ -263,7 +266,10 @@ int nvjpegDecoder::init()
     &share_param.nvjpeg_handle);
   share_param.hw_decode_available = true;
   if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
-    std::cout << "Hardware Decoder not supported. Falling back to default backend" << std::endl;
+    RCLCPP_WARN(
+      rclcpp::get_logger("nvjpegDecoder"), 
+      "Hardware Decoder not supported. Falling back to default backend"
+    );
     CHECK_NVJPEG(nvjpegCreateEx(
       NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
       &share_param.nvjpeg_handle));
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index 1963320fb6fbf..271730756f1bb 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -116,14 +116,15 @@ PostprocessGPU::PostprocessGPU(
 
   iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
 
+  std::ostringstream oss;
   for (auto i = 0; i < nms_rescale_factor.size(); i++) {
-    std::cout << std::fixed << std::setprecision(2) << nms_rescale_factor[i];
-    if (i == nms_rescale_factor.size() - 1) {
-      std::cout << std::endl;
-    } else {
-      std::cout << ' ';
-    }
+    oss << std::fixed << std::setprecision(2) << nms_rescale_factor[i] << ' ';
   }
+  RCLCPP_INFO(
+        rclcpp::get_logger("PostprocessGPU"), 
+        "%s", 
+        oss.str().c_str() 
+  );
 }
 PostprocessGPU::~PostprocessGPU()
 {
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
index 401bc889106fb..694b9a78bf6f6 100644
--- a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
@@ -202,7 +202,10 @@ int32_t PreprocessPlugin::enqueue(
 
       break;
     default:  // should NOT be here
-      std::cerr << "\tUnsupported datatype!" << std::endl;
+      RCLCPP_ERROR(
+        rclcpp::get_logger("PreprocessPlugin"),
+        "\tUnsupport datatype!"
+      );
   }
   return 0;
 }

From 38b1d8186afb71eb04f469e76fac0ece12193fac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 07:28:17 +0000
Subject: [PATCH 43/85] style(pre-commit): autofix

---
 .../autoware/tensorrt_bevdet/common.hpp       |  12 +-
 .../tensorrt_bevdet/nvjpegdecoder.hpp         |  12 +-
 .../src/alignbev_plugin.cu                    |   5 +-
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 125 ++++--------------
 .../src/bevpool_plugin.cu                     |   5 +-
 .../src/cpu_jpegdecoder.cpp                   |  21 +--
 .../autoware_tensorrt_bevdet/src/data.cpp     |   7 +-
 .../src/gatherbev_plugin.cu                   |   5 +-
 .../autoware_tensorrt_bevdet/src/iou3d_nms.cu |  72 +++-------
 .../src/nvjpegdecoder.cpp                     |  11 +-
 .../src/postprocess.cu                        |   6 +-
 .../src/preprocess_plugin.cu                  |   5 +-
 12 files changed, 69 insertions(+), 217 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index 4eaead125055a..966f52e3ce5be 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -14,6 +14,9 @@
 #ifndef AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
 
+#include "rclcpp/logger.hpp"
+#include "rclcpp/rclcpp.hpp"
+
 #include <NvInfer.h>
 #include <assert.h>
 #include <cuda_runtime.h>
@@ -23,8 +26,6 @@
 
 #include <iostream>
 #include <string>
-#include "rclcpp/logger.hpp"
-#include "rclcpp/rclcpp.hpp"
 
 typedef unsigned char uchar;
 
@@ -41,12 +42,7 @@ inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort
 {
   if (code != cudaSuccess) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("GPUAssert"), 
-      "GPUassert: %s %s %d",                     
-      cudaGetErrorString(code),                  
-      file,                                      
-      line                                       
-    );
+      rclcpp::get_logger("GPUAssert"), "GPUassert: %s %s %d", cudaGetErrorString(code), file, line);
     if (abort) exit(code);
   }
 };
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index 450ac3edfe6bd..b3c91f20bd1d3 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -46,12 +46,8 @@ inline void NvJpegAssert(nvjpegStatus_t code, const char * file, int line)
 {
   if (code != NVJPEG_STATUS_SUCCESS) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("autoware_tensorrt_bevdet"), 
-      "NVJPEG failure: '#%d' at %s:%d", 
-      code,
-      file, 
-      line 
-    ); 
+      rclcpp::get_logger("autoware_tensorrt_bevdet"), "NVJPEG failure: '#%d' at %s:%d", code, file,
+      line);
     exit(1);
   }
 }
@@ -83,9 +79,7 @@ struct share_params
       fmt = (nvjpegOutputFormat_t)_fmt;
     } else {
       RCLCPP_WARN(
-        rclcpp::get_logger("autoware_tensorrt_bevdet"), 
-        "Unknown format! Auto switch BGR!" 
-      );
+        rclcpp::get_logger("autoware_tensorrt_bevdet"), "Unknown format! Auto switch BGR!");
       fmt = NVJPEG_OUTPUT_BGR;
     }
   }
diff --git a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
index a23a30fd29d04..0321bf103f049 100644
--- a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
@@ -217,10 +217,7 @@ int32_t AlignBEVPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      RCLCPP_ERROR(
-        rclcpp::get_logger("AlignBEVPlugin"), 
-        "\tUnsupported datatype!"
-      );
+      RCLCPP_ERROR(rclcpp::get_logger("AlignBEVPlugin"), "\tUnsupported datatype!");
   }
 
   return 0;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index d9e70666cbaa5..6b58cfdee763b 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -39,11 +39,7 @@ BEVDet::BEVDet(
   InitParams(config_file);
   if (n_img != N_img) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "BEVDet need %d images, but given %d images!", 
-      N_img, 
-      n_img 
-    );
+      rclcpp::get_logger("BEVDet"), "BEVDet need %d images, but given %d images!", N_img, n_img);
   }
   auto start = high_resolution_clock::now();
 
@@ -58,23 +54,15 @@ BEVDet::BEVDet(
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
   RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "InitVewTransformer cost time : %.4lf ms",
-    t.count() * 1000 
-  );
+    rclcpp::get_logger("BEVDet"), "InitVewTransformer cost time : %.4lf ms", t.count() * 1000);
 
   if (access(engine_file.c_str(), F_OK) == 0) {
-    RCLCPP_INFO(
-      rclcpp::get_logger("BEVDet"), 
-      "Inference engine prepared."
-    );
+    RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Inference engine prepared.");
   } else {
     // onnx to engine
     RCLCPP_WARN(
-      rclcpp::get_logger("BEVDet"), 
-      "Could not find %s, try making TensorRT engine from onnx", 
-      engine_file.c_str() 
-    );
+      rclcpp::get_logger("BEVDet"), "Could not find %s, try making TensorRT engine from onnx",
+      engine_file.c_str());
     ExportEngine(onnx_file, engine_file);
   }
   InitEngine(engine_file);  // FIXME
@@ -253,11 +241,7 @@ void BEVDet::MallocDeviceMemory()
     CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
   }
 
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "img num binding : %d", 
-    trt_engine->getNbBindings() 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "img num binding : %d", trt_engine->getNbBindings());
 
   post_buffer = reinterpret_cast<void **>(new void *[class_num_pre_task.size() * 6]);
   for (size_t i = 0; i < class_num_pre_task.size(); i++) {
@@ -395,16 +379,8 @@ void BEVDet::InitViewTransformer(
   interval_starts_ptr.reset(interval_starts_host_ptr);
   interval_lengths_ptr.reset(interval_lengths_host_ptr);
 
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "valid_feat_num: %d", 
-     valid_feat_num
-  );
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "unique_bev_num: %d", 
-    unique_bev_num
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num);
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num);
 }
 
 void print_dim(nvinfer1::Dims dim, std::string name)
@@ -414,11 +390,7 @@ void print_dim(nvinfer1::Dims dim, std::string name)
   for (auto i = 0; i < dim.nbDims; i++) {
     oss << dim.d[i] << ' ';
   }
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "%s", 
-    oss.str().c_str() 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "%s", oss.str().c_str());
 }
 
 int BEVDet::InitEngine(const std::string & engine_file)
@@ -428,19 +400,13 @@ int BEVDet::InitEngine(const std::string & engine_file)
   }
 
   if (trt_engine == nullptr) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed to deserialize engine file!"
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to deserialize engine file!");
     return EXIT_FAILURE;
   }
   trt_context = trt_engine->createExecutionContext();
 
   if (trt_context == nullptr) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed to create TensorRT Execution Context!"
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to create TensorRT Execution Context!");
     return EXIT_FAILURE;
   }
 
@@ -508,18 +474,10 @@ int BEVDet::DeserializeTRTEngine(
   for (int bi = 0; bi < engine->getNbBindings(); bi++) {
     if (engine->bindingIsInput(bi) == true) {
       RCLCPP_INFO(
-        rclcpp::get_logger("BEVDet"), 
-        "Binding %d (%s): Input.", 
-        bi, 
-        engine->getBindingName(bi) 
-      );
+        rclcpp::get_logger("BEVDet"), "Binding %d (%s): Input.", bi, engine->getBindingName(bi));
     } else {
       RCLCPP_INFO(
-        rclcpp::get_logger("BEVDet"), 
-        "Binding %d (%s): Output.", 
-        bi, 
-        engine->getBindingName(bi) 
-      );
+        rclcpp::get_logger("BEVDet"), "Binding %d (%s): Output.", bi, engine->getBindingName(bi));
     }
   }
   return EXIT_SUCCESS;
@@ -632,10 +590,7 @@ int BEVDet::DoInfer(
     cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
   if (!trt_context->executeV2(trt_buffer_dev)) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "BEVDet forward failing!" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "BEVDet forward failing!");
   }
 
   adj_frame_ptr->saveFrameBuffer(
@@ -693,26 +648,17 @@ void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtF
   nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger);
 
   if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger.reportable_severity))) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed parsing .onnx file!" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed parsing .onnx file!");
     for (int i = 0; i < parser->getNbErrors(); ++i) {
       auto * error = parser->getError(i);
       RCLCPP_ERROR(
-        rclcpp::get_logger("BEVDet"),
-        "Error code: %d, Description: %s",
-        static_cast<int>(error->code()),
-        error->desc()
-      );
+        rclcpp::get_logger("BEVDet"), "Error code: %d, Description: %s",
+        static_cast<int>(error->code()), error->desc());
     }
 
     return;
   }
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "Succeeded parsing .onnx file!" 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded parsing .onnx file!");
 
   for (size_t i = 0; i < min_shapes.size(); i++) {
     nvinfer1::ITensor * it = network->getInput(i);
@@ -724,51 +670,30 @@ void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtF
 
   nvinfer1::IHostMemory * engineString = builder->buildSerializedNetwork(*network, *config);
   if (engineString == nullptr || engineString->size() == 0) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed building serialized engine!" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed building serialized engine!");
     return;
   }
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "Succeeded building serialized engine!" 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded building serialized engine!");
 
   nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger)};
   engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
   if (engine == nullptr) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed building engine!" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed building engine!");
     return;
   }
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "Succeeded building engine!" 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded building engine!");
 
   std::ofstream engineFile(trtFile, std::ios::binary);
   if (!engineFile) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed opening file to write" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed opening file to write");
     return;
   }
   engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
   if (engineFile.fail()) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), 
-      "Failed saving .engine file!" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed saving .engine file!");
     return;
   }
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), 
-    "Succeeded saving .engine file!" 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded saving .engine file!");
 }
 
 BEVDet::~BEVDet()
diff --git a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
index ca11b679f67db..a498a2a1685ba 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
@@ -219,10 +219,7 @@ int32_t BEVPoolPlugin::enqueue(
       }
       break;
     default:  // should NOT be here
-      RCLCPP_ERROR(
-        rclcpp::get_logger("BEVPoolPlugin"), 
-        "\tUnsupport datatype!"
-      );
+      RCLCPP_ERROR(rclcpp::get_logger("BEVPoolPlugin"), "\tUnsupport datatype!");
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index 295a00c392b81..c00296bc41e34 100644
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -42,35 +42,24 @@ int decode_jpeg(const std::vector<char> & buffer, uchar * output)
 
   if (setjmp(jerr.setjmp_buffer)) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("decode_jpeg"), 
-      "Failed to decompress jpeg: %s", 
-      jerr.pub.jpeg_message_table[jerr.pub.msg_code] 
-    );
+      rclcpp::get_logger("decode_jpeg"), "Failed to decompress jpeg: %s",
+      jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
     jpeg_destroy_decompress(&cinfo);
     return EXIT_FAILURE;
   }
   jpeg_create_decompress(&cinfo);
 
   if (buffer.size() == 0) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("decode_jpeg"),
-      "buffer size is 0"
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "buffer size is 0");
     return EXIT_FAILURE;
   }
   jpeg_mem_src(&cinfo, reinterpret_cast<const uchar *>(buffer.data()), buffer.size());
   if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("decode_jpeg"), 
-      "Failed to read jpeg header" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "Failed to read jpeg header");
     return EXIT_FAILURE;
   }
   if (jpeg_start_decompress(&cinfo) != TRUE) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("decode_jpeg"), 
-      "Failed to start decompress" 
-    );
+    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "Failed to start decompress");
     return EXIT_FAILURE;
   }
 
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index 0cca4263dbd60..9171dc7b65b84 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -30,11 +30,8 @@ camParams::camParams(const YAML::Node & config, int n, std::vector<std::string>
 {
   if (static_cast<size_t>(n) != cams_name.size()) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("camParams"), 
-      "Error! Need %d camera param, but given %zu camera names!", 
-      n,
-      cams_name.size()
-    );
+      rclcpp::get_logger("camParams"), "Error! Need %d camera param, but given %zu camera names!",
+      n, cams_name.size());
   }
   ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
   ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
diff --git a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
index b62fe057c415f..e9fecf8cef37a 100644
--- a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
@@ -182,10 +182,7 @@ int32_t GatherBEVPlugin::enqueue(
         reinterpret_cast<__half *>(outputs[0]));
       break;
     default:  // should NOT be here
-      RCLCPP_ERROR(
-        rclcpp::get_logger("GatherBEVPlugin"), 
-        "\tUnsupport datatype!" 
-      );
+      RCLCPP_ERROR(rclcpp::get_logger("GatherBEVPlugin"), "\tUnsupport datatype!");
   }
   return 0;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index 314eff52681a9..fda6e0fce93f8 100644
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -133,16 +133,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
 #ifdef DEBUG
   RCLCPP_DEBUG(
     rclcpp::get_logger("iou3d_nms"),
-    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)",
-    a_x1, a_y1, a_x2, a_y2, a_angle, 
-    b_x1, b_y1, b_x2, b_y2, b_angle 
-  );
+    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)", a_x1, a_y1, a_x2, a_y2,
+    a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
   RCLCPP_DEBUG(
-    rclcpp::get_logger("iou3d_nms"),
-    "center a: (%.3f, %.3f), b: (%.3f, %.3f)",
-    center_a.x, center_a.y, 
-    center_b.x, center_b.y 
-  );
+    rclcpp::get_logger("iou3d_nms"), "center a: (%.3f, %.3f), b: (%.3f, %.3f)", center_a.x,
+    center_a.y, center_b.x, center_b.y);
 #endif
 
   Point box_a_corners[5];
@@ -164,23 +159,15 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   for (int k = 0; k < 4; k++) {
 #ifdef DEBUG
     RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"), 
-      "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)",
-      k,                           
-      box_a_corners[k].x, box_a_corners[k].y,  
-      box_b_corners[k].x, box_b_corners[k].y  
-    );
+      rclcpp::get_logger("iou3d_nms"), "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)", k,
+      box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
 #endif
     rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
     rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
 #ifdef DEBUG
     RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"),
-      "corner %d: a(%.3f, %.3f), b(%.3f, %.3f)",
-      k,                           
-      box_a_corners[k].x, box_a_corners[k].y,  
-      box_b_corners[k].x, box_b_corners[k].y  
-    );
+      rclcpp::get_logger("iou3d_nms"), "corner %d: a(%.3f, %.3f), b(%.3f, %.3f)", k,
+      box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
 #endif
   }
 
@@ -203,14 +190,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
         cnt++;
 #ifdef DEBUG
         RCLCPP_DEBUG(
-          rclcpp::get_logger("iou3d_nms"), 
-          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f)", 
-          cross_points[cnt - 1].x, cross_points[cnt - 1].y,  
-          box_a_corners[i].x, box_a_corners[i].y,           
-          box_a_corners[i + 1].x, box_a_corners[i + 1].y,   
-          box_b_corners[i].x, box_b_corners[i].y,          
-          box_b_corners[i + 1].x, box_b_corners[i + 1].y 
-        );
+          rclcpp::get_logger("iou3d_nms"),
+          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f)",
+          cross_points[cnt - 1].x, cross_points[cnt - 1].y, box_a_corners[i].x, box_a_corners[i].y,
+          box_a_corners[i + 1].x, box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
+          box_b_corners[i + 1].x, box_b_corners[i + 1].y);
 #endif
       }
     }
@@ -224,10 +208,8 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cnt++;
 #ifdef DEBUG
       RCLCPP_DEBUG(
-        rclcpp::get_logger("iou3d_nms"), 
-        "b corners in a: corner_b(%.3f, %.3f)", 
-        cross_points[cnt - 1].x, cross_points[cnt - 1].y 
-      );
+        rclcpp::get_logger("iou3d_nms"), "b corners in a: corner_b(%.3f, %.3f)",
+        cross_points[cnt - 1].x, cross_points[cnt - 1].y);
 #endif
     }
     if (check_in_box2d(box_b, box_a_corners[k])) {
@@ -236,10 +218,8 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
       cnt++;
 #ifdef DEBUG
       RCLCPP_DEBUG(
-        rclcpp::get_logger("iou3d_nms"), 
-        "a corners in b: corner_a(%.3f, %.3f)", 
-        cross_points[cnt - 1].x, cross_points[cnt - 1].y 
-      );
+        rclcpp::get_logger("iou3d_nms"), "a corners in b: corner_a(%.3f, %.3f)",
+        cross_points[cnt - 1].x, cross_points[cnt - 1].y);
 #endif
     }
   }
@@ -260,18 +240,11 @@ __device__ inline float box_overlap(const float * box_a, const float * box_b)
   }
 
 #ifdef DEBUG
-  RCLCPP_DEBUG(
-    rclcpp::get_logger("iou3d_nms"), 
-    "cnt=%d", 
-    cnt 
-  );
+  RCLCPP_DEBUG(rclcpp::get_logger("iou3d_nms"), "cnt=%d", cnt);
   for (int i = 0; i < cnt; i++) {
     RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"), 
-      "All cross point %d: (%.3f, %.3f)", 
-      i,                             
-      cross_points[i].x, cross_points[i].y 
-    );
+      rclcpp::get_logger("iou3d_nms"), "All cross point %d: (%.3f, %.3f)", i, cross_points[i].x,
+      cross_points[i].y);
   }
 #endif
 
@@ -654,10 +627,7 @@ int Iou3dNmsCuda::DoIou3dNms(
 
   if (cudaSuccess != cudaGetLastError()) {
     RCLCPP_ERROR(
-        rclcpp::get_logger("iou3d_nms"), 
-        "Error: %s", 
-        cudaGetErrorString(cudaGetLastError()) 
-    );
+      rclcpp::get_logger("iou3d_nms"), "Error: %s", cudaGetErrorString(cudaGetLastError()));
   }
   return num_to_keep;
 }
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index ed467db2b8a74..19a7c168b6201 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -238,10 +238,8 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   duration<double> decode_time = decode_end - decode_start;
 
   RCLCPP_INFO(
-    rclcpp::get_logger("nvjpegDecoder"), 
-    "Decode total time : %.4lf ms",
-    decode_time.count() * 1000
-  );
+    rclcpp::get_logger("nvjpegDecoder"), "Decode total time : %.4lf ms",
+    decode_time.count() * 1000);
 
   for (size_t i = 0; i < files_data.size(); i++) {
     get_img(
@@ -267,9 +265,8 @@ int nvjpegDecoder::init()
   share_param.hw_decode_available = true;
   if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
     RCLCPP_WARN(
-      rclcpp::get_logger("nvjpegDecoder"), 
-      "Hardware Decoder not supported. Falling back to default backend"
-    );
+      rclcpp::get_logger("nvjpegDecoder"),
+      "Hardware Decoder not supported. Falling back to default backend");
     CHECK_NVJPEG(nvjpegCreateEx(
       NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
       &share_param.nvjpeg_handle));
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index 271730756f1bb..009c2bf279c68 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -120,11 +120,7 @@ PostprocessGPU::PostprocessGPU(
   for (auto i = 0; i < nms_rescale_factor.size(); i++) {
     oss << std::fixed << std::setprecision(2) << nms_rescale_factor[i] << ' ';
   }
-  RCLCPP_INFO(
-        rclcpp::get_logger("PostprocessGPU"), 
-        "%s", 
-        oss.str().c_str() 
-  );
+  RCLCPP_INFO(rclcpp::get_logger("PostprocessGPU"), "%s", oss.str().c_str());
 }
 PostprocessGPU::~PostprocessGPU()
 {
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
index 694b9a78bf6f6..25abf687aaae8 100644
--- a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
@@ -202,10 +202,7 @@ int32_t PreprocessPlugin::enqueue(
 
       break;
     default:  // should NOT be here
-      RCLCPP_ERROR(
-        rclcpp::get_logger("PreprocessPlugin"),
-        "\tUnsupport datatype!"
-      );
+      RCLCPP_ERROR(rclcpp::get_logger("PreprocessPlugin"), "\tUnsupport datatype!");
   }
   return 0;
 }

From 87b040f14749aa42ae9cbd4dc6bbcd60e7d045ac Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 20 Aug 2024 15:44:14 +0800
Subject: [PATCH 44/85] fix cppcheck ci

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 6b58cfdee763b..9a264dc3a7a6a 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -383,7 +383,7 @@ void BEVDet::InitViewTransformer(
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num);
 }
 
-void print_dim(nvinfer1::Dims dim, std::string name)
+void print_dim(nvinfer1::Dims dim, const std::string & name)
 {
   std::ostringstream oss;
   oss << name << " : ";

From de1da9f5db9aa0af854688f77b2cf3893f9398d6 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 21 Aug 2024 11:11:04 +0800
Subject: [PATCH 45/85] fix: use english comment

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 242139eb07849..1f414f7598398 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -81,7 +81,7 @@ class TRTBEVDetNode : public rclcpp::Node
   float score_thre_;
 
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
-    pub_boxes_;  // ros2无该消息类型
+    pub_boxes_; 
 
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;

From 399b7f345e279dec48223f49e862f7282b51e140 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 21 Aug 2024 03:13:27 +0000
Subject: [PATCH 46/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp           | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 1f414f7598398..5889f97deef0d 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -80,8 +80,7 @@ class TRTBEVDetNode : public rclcpp::Node
   uchar * imgs_dev_ = nullptr;
   float score_thre_;
 
-  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
-    pub_boxes_; 
+  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr pub_boxes_;
 
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;

From d68439950fcd3971446bcce01b7c0214b5e37d61 Mon Sep 17 00:00:00 2001
From: Yuxuan Liu <619684051@qq.com>
Date: Sun, 25 Aug 2024 13:46:30 +0900
Subject: [PATCH 47/85] using RCLCPP_DEBUG instead of print

Signed-off-by: Yuxuan Liu <619684051@qq.com>
---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 9a264dc3a7a6a..c1ff893cc4f22 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -576,7 +576,7 @@ void BEVDet::GetCurr2AdjTransform(
 int BEVDet::DoInfer(
   const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
 {
-  printf("-------------------%d-------------------\n", idx + 1);
+  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "---------%d---------", idx + 1);
 
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
@@ -609,11 +609,10 @@ int BEVDet::DoInfer(
   duration<double> post_t = post_end - end;
 
   cost_time = infer_t.count() * 1000;
-  printf("TRT-Engine  : %.5lf ms\n", engine_t.count() * 1000);
-  printf("Postprocess : %.5lf ms\n", post_t.count() * 1000);
-  printf("Inference   : %.5lf ms\n", infer_t.count() * 1000);
-
-  printf("Detect %ld objects\n", out_detections.size());
+  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Inference time: %.5lf ms", engine_t.count() * 1000);
+  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Postprocess time: %.5lf ms", post_t.count() * 1000);
+  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Total time: %.5lf ms", infer_t.count() * 1000);
+  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Detect %ld objects", out_detections.size());
 
   return EXIT_SUCCESS;
 }

From cf8dfff4f8d5bf4611df7f6a1f6843d8dbea743b Mon Sep 17 00:00:00 2001
From: Yuxuan Liu <619684051@qq.com>
Date: Sun, 25 Aug 2024 22:52:47 +0900
Subject: [PATCH 48/85] using cameara info and TF

Signed-off-by: Yuxuan Liu <619684051@qq.com>
---
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  |  43 ++++++-
 .../include/autoware/tensorrt_bevdet/data.hpp |   7 ++
 .../src/bevdet_node.cpp                       | 118 ++++++++++++++++--
 3 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 5889f97deef0d..115746c31df3d 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -16,7 +16,11 @@
 
 #include "bevdet.hpp"
 #include "cpu_jpegdecoder.hpp"
+#include "tf2_ros/buffer.h"
+#include "tf2_ros/transform_listener.h"
 
+#include <Eigen/Core>
+#include <Eigen/Geometry>
 #include <opencv2/opencv.hpp>
 
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
@@ -26,6 +30,7 @@
 #include <geometry_msgs/msg/point32.hpp>
 #include <geometry_msgs/msg/quaternion.hpp>
 #include <geometry_msgs/msg/vector3.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
 #include <sensor_msgs/msg/image.hpp>
 #include <sensor_msgs/msg/point_cloud2.hpp>
 #include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
@@ -57,6 +62,13 @@ void box3DToDetectedObjects(
   const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
   const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
 
+void getTransform(
+  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
+  Eigen::Translation3f & translation);
+
+void getCameraIntrinsics(
+  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
+
 class TRTBEVDetNode : public rclcpp::Node
 {
 private:
@@ -81,7 +93,26 @@ class TRTBEVDetNode : public rclcpp::Node
   float score_thre_;
 
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr pub_boxes_;
-
+  // Subscribers of camera info for each camera, no need to synchonrize
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_f_caminfo_;
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_fl_caminfo_;
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_fr_caminfo_;
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_b_caminfo_;
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_bl_caminfo_;
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_br_caminfo_;
+  std::vector<bool> caminfo_received_;
+  bool camera_info_received_flag_ = false;
+
+  // tf listener
+  std::shared_ptr<tf2_ros::TransformListener> tf_listener_{nullptr};
+  std::unique_ptr<tf2_ros::Buffer> tf_buffer_;
+
+  // Camera parameters;
+  std::vector<Eigen::Matrix3f> cams_intrin;
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
+  std::vector<Eigen::Translation3f> cams2ego_trans;
+
+  // Subscribers of images for each camera, synchonrized
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;
   message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_;
@@ -97,6 +128,14 @@ class TRTBEVDetNode : public rclcpp::Node
   typedef message_filters::Synchronizer<MySyncPolicy> Sync;
   std::shared_ptr<Sync> sync_;
 
+  // Timer for checking initialization
+  rclcpp::TimerBase::SharedPtr timer_;
+  void initParameters();
+  void startCameraInfoSubscription();
+  void checkInitialization();
+  void initModel();
+  void startImageSubscription();
+
 public:
   TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
   ~TRTBEVDetNode();
@@ -108,6 +147,8 @@ class TRTBEVDetNode : public rclcpp::Node
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_bl_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
+
+  void camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg);
 };
 
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index bfd75f1ebe191..25c6708f91583 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -29,6 +29,13 @@ struct camParams
 {
   camParams() = default;
   camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name);
+  camParams(
+    const std::vector<Eigen::Matrix3f> & _cams_intrin,
+    const std::vector<Eigen::Quaternion<float>> & _cams2ego_rot,
+    const std::vector<Eigen::Translation3f> & _cams2ego_trans)
+  : cams_intrin(_cams_intrin), cams2ego_rot(_cams2ego_rot), cams2ego_trans(_cams2ego_trans)
+  {
+  }
 
   int N_img;
 
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index d6124026b4302..6cab819808348 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -95,29 +95,53 @@ void box3DToDetectedObjects(
   }
 }
 
+void getTransform(
+  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
+  Eigen::Translation3f & translation)
+{
+  rot = Eigen::Quaternion<float>(
+    transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y,
+    transform.transform.rotation.z);
+  translation = Eigen::Translation3f(
+    transform.transform.translation.x, transform.transform.translation.y,
+    transform.transform.translation.z);
+}
+
+void getCameraIntrinsics(
+  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics)
+{
+  intrinsics << msg->k[0], msg->k[1], msg->k[2], msg->k[3], msg->k[4], msg->k[5], msg->k[6],
+    msg->k[7], msg->k[8];
+}
+
 TRTBEVDetNode::TRTBEVDetNode(
   const std::string & node_name, const rclcpp::NodeOptions & node_options)
 : rclcpp::Node(node_name, node_options)
 {
-  using std::placeholders::_1;
-  using std::placeholders::_2;
-  using std::placeholders::_3;
-  using std::placeholders::_4;
-  using std::placeholders::_5;
-  using std::placeholders::_6;
-
-  score_thre_ = this->declare_parameter<float>("post_process_params.score_threshold", 0.2);
-
   img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
   img_w_ = this->declare_parameter<int>("data_params.W", 1600);  // W: 1600
   img_h_ = this->declare_parameter<int>("data_params.H", 900);   // H: 900
 
+  caminfo_received_ = std::vector<bool>(img_N_, false);
+  cams_intrin = std::vector<Eigen::Matrix3f>(img_N_);
+  cams2ego_rot = std::vector<Eigen::Quaternion<float>>(img_N_);
+  cams2ego_trans = std::vector<Eigen::Translation3f>(img_N_);
+
+  startCameraInfoSubscription();
+  timer_ = this->create_wall_timer(
+    std::chrono::milliseconds(100), std::bind(&TRTBEVDetNode::checkInitialization, this));
+}
+
+void TRTBEVDetNode::initModel()
+{
+  score_thre_ = this->declare_parameter<float>("post_process_params.score_threshold", 0.2);
+
   model_config_ = this->declare_parameter("model_config", "bevdet_r50_4dlongterm_depth.yaml");
 
   onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
   engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
 
-  camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml"));
+  // camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml"));
 
   imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
   class_names_ =
@@ -125,7 +149,8 @@ TRTBEVDetNode::TRTBEVDetNode(
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
 
-  sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
+  // sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
+  sampleData_.param = camParams(cams_intrin, cams2ego_rot, cams2ego_trans);
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
 
@@ -140,6 +165,29 @@ TRTBEVDetNode::TRTBEVDetNode(
 
   pub_boxes_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjects>(
     "~/output/boxes", rclcpp::QoS{1});
+}
+
+void TRTBEVDetNode::checkInitialization()
+{
+  if (camera_info_received_flag_) {
+    RCLCPP_INFO_STREAM(
+      this->get_logger(), "Camera Info and TF Transform Initialization completed!");
+    initModel();
+    startImageSubscription();
+    timer_->cancel();
+    timer_.reset();
+    timer_ = nullptr;
+  }
+}
+
+void TRTBEVDetNode::startImageSubscription()
+{
+  using std::placeholders::_1;
+  using std::placeholders::_2;
+  using std::placeholders::_3;
+  using std::placeholders::_4;
+  using std::placeholders::_5;
+  using std::placeholders::_6;
 
   sub_f_img_.subscribe(
     this, "~/input/topic_img_f",
@@ -158,6 +206,35 @@ TRTBEVDetNode::TRTBEVDetNode(
   sync_->registerCallback(std::bind(&TRTBEVDetNode::callback, this, _1, _2, _3, _4, _5, _6));
 }
 
+void TRTBEVDetNode::startCameraInfoSubscription()
+{
+  // cams: ["CAM_FRONT_LEFT", "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_LEFT", "CAM_BACK",
+  // "CAM_BACK_RIGHT"]
+  sub_fl_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_fl/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(0, msg); });
+
+  sub_f_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_f/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(1, msg); });
+
+  sub_fr_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_fr/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(2, msg); });
+
+  sub_bl_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_bl/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(3, msg); });
+
+  sub_b_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_b/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(4, msg); });
+
+  sub_br_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
+    "~/input/topic_img_br/camera_info", rclcpp::QoS{1},
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(5, msg); });
+}
+
 void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
 {
   uchar * temp = new uchar[width * height * 3];
@@ -217,6 +294,25 @@ void TRTBEVDetNode::callback(
   pub_boxes_->publish(bevdet_objects);
 }
 
+void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg)
+{
+  Eigen::Matrix3f intrinsics;
+  getCameraIntrinsics(msg, intrinsics);
+  cams_intrin[idx] = intrinsics;
+
+  Eigen::Quaternion<float> rot;
+  Eigen::Translation3f translation;
+  getTransform(
+    tf_buffer_->lookupTransform("base_link", msg->header.frame_id, rclcpp::Time(0)), rot,
+    translation);
+  cams2ego_rot[idx] = rot;
+  cams2ego_trans[idx] = translation;
+
+  caminfo_received_[idx] = true;
+  camera_info_received_flag_ =
+    std::all_of(caminfo_received_.begin(), caminfo_received_.end(), [](bool i) { return i; });
+}
+
 TRTBEVDetNode::~TRTBEVDetNode()
 {
   delete imgs_dev_;

From d24a4239ccb43753f1b40ca6d32c31d5aeb92c9d Mon Sep 17 00:00:00 2001
From: YuxuanLiuTier4Desktop <619684051@qq.com>
Date: Mon, 26 Aug 2024 10:35:00 +0900
Subject: [PATCH 49/85] debug and make it runnable with ROS2 nuscenes dataset

Signed-off-by: YuxuanLiuTier4Desktop <619684051@qq.com>
---
 .../launch/tensorrt_bevdet.launch.xml         | 28 +++++++++++++------
 .../src/bevdet_node.cpp                       | 13 ++++++++-
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 6f9fafedb86ea..601b71cfefbe3 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -1,18 +1,24 @@
 <?xml version="1.0"?>
 <launch>
   <!-- input topic name -->
-  <arg name="input/img_fl" default="/cam_front_left/raw"/>
-  <arg name="input/img_f" default="/cam_front/raw"/>
-  <arg name="input/img_fr" default="/cam_front_right/raw"/>
-  <arg name="input/img_bl" default="/cam_back_left/raw"/>
-  <arg name="input/img_b" default="/cam_back/raw"/>
-  <arg name="input/img_br" default="/cam_back_right/raw"/>
+  <arg name="input/img_fl" default="/nuscenes/CAM_FRONT_LEFT/image"/>
+  <arg name="input/img_f" default="/nuscenes/CAM_FRONT/image"/>
+  <arg name="input/img_fr" default="/nuscenes/CAM_FRONT_RIGHT/image"/>
+  <arg name="input/img_bl" default="/nuscenes/CAM_BACK_LEFT/image"/>
+  <arg name="input/img_b" default="/nuscenes/CAM_BACK/image"/>
+  <arg name="input/img_br" default="/nuscenes/CAM_BACK_RIGHT/image"/>
   <arg name="output/boxes" default="boxes"/>
 
+  <arg name="input/img_fl/camera_info" default="/nuscenes/CAM_FRONT_LEFT/camera_info"/>
+  <arg name="input/img_f/camera_info" default="/nuscenes/CAM_FRONT/camera_info"/>
+  <arg name="input/img_fr/camera_info" default="/nuscenes/CAM_FRONT_RIGHT/camera_info"/>
+  <arg name="input/img_bl/camera_info" default="/nuscenes/CAM_BACK_LEFT/camera_info"/>
+  <arg name="input/img_b/camera_info" default="/nuscenes/CAM_BACK/camera_info"/>
+  <arg name="input/img_br/camera_info" default="/nuscenes/CAM_BACK_RIGHT/camera_info"/>
+
   <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet"/>
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx"/>
   <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine"/>
-  <arg name="cam_config" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/cams_param.yaml"/>
   <arg name="model_config" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
   <arg name="param_file" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet.param.yaml"/>
 
@@ -26,11 +32,17 @@
     <remap from="~/input/topic_img_b" to="$(var input/img_b)"/>
     <remap from="~/input/topic_img_br" to="$(var input/img_br)"/>
 
+    <remap from="~/input/topic_img_fl/camera_info" to="$(var input/img_fl/camera_info)"/>
+    <remap from="~/input/topic_img_f/camera_info" to="$(var input/img_f/camera_info)"/>
+    <remap from="~/input/topic_img_fr/camera_info" to="$(var input/img_fr/camera_info)"/>
+    <remap from="~/input/topic_img_bl/camera_info" to="$(var input/img_bl/camera_info)"/>
+    <remap from="~/input/topic_img_b/camera_info" to="$(var input/img_b/camera_info)"/>
+    <remap from="~/input/topic_img_br/camera_info" to="$(var input/img_br/camera_info)"/>
+
     <remap from="~/output/boxes" to="$(var output/boxes)"/>
 
     <param name="onnx_file" value="$(var onnx_file)"/>
     <param name="engine_file" value="$(var engine_file)"/>
-    <param name="cam_config" value="$(var cam_config)"/>
     <param name="model_config" value="$(var model_config)"/>
     <param from="$(var param_file)"/>
   </node>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 6cab819808348..a4b3bad296955 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -127,6 +127,11 @@ TRTBEVDetNode::TRTBEVDetNode(
   cams2ego_rot = std::vector<Eigen::Quaternion<float>>(img_N_);
   cams2ego_trans = std::vector<Eigen::Translation3f>(img_N_);
 
+  tf_buffer_ =
+      std::make_unique<tf2_ros::Buffer>(this->get_clock());
+  tf_listener_ =
+      std::make_shared<tf2_ros::TransformListener>(*tf_buffer_);
+
   startCameraInfoSubscription();
   timer_ = this->create_wall_timer(
     std::chrono::milliseconds(100), std::bind(&TRTBEVDetNode::checkInitialization, this));
@@ -176,7 +181,11 @@ void TRTBEVDetNode::checkInitialization()
     startImageSubscription();
     timer_->cancel();
     timer_.reset();
-    timer_ = nullptr;
+  }
+  else {
+    RCLCPP_INFO_THROTTLE(
+      this->get_logger(), *this->get_clock(), 5000,
+      "Waiting for Camera Info and TF Transform Initialization...");
   }
 }
 
@@ -296,6 +305,8 @@ void TRTBEVDetNode::callback(
 
 void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg)
 {
+  if (caminfo_received_[idx]) return; // already received;  not expected to modify because of we init the model only once
+
   Eigen::Matrix3f intrinsics;
   getCameraIntrinsics(msg, intrinsics);
   cams_intrin[idx] = intrinsics;

From f4e3a7fe95e57afc2239093379b42c9c6513b09e Mon Sep 17 00:00:00 2001
From: YuxuanLiuTier4Desktop <619684051@qq.com>
Date: Mon, 26 Aug 2024 10:47:16 +0900
Subject: [PATCH 50/85] fix documents and clean up

Signed-off-by: YuxuanLiuTier4Desktop <619684051@qq.com>
---
 perception/autoware_tensorrt_bevdet/README.md |  30 ++--
 .../config/cams_param.yaml                    |  93 -----------
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  | 155 ++++++++++++------
 .../src/bevdet_node.cpp                       |  19 +--
 4 files changed, 134 insertions(+), 163 deletions(-)
 delete mode 100644 perception/autoware_tensorrt_bevdet/config/cams_param.yaml

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 096bf676a4672..2ad2e14bed2fe 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -12,22 +12,26 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 ### Inputs
 
-| Name                   | Type                            | Description                                                 |
-| ---------------------- | ------------------------------- | ----------------------------------------------------------- |
-| `~/input/pointcloud`   | `sensor_msgs::msg::PointCloud2` | input pointcloud (only used for time alignment and display) |
-| `~/input/topic_img_fl` | `sensor_msgs::msg::Image`       | input front_left camera image                               |
-| `~/input/topic_img_f`  | `sensor_msgs::msg::Image`       | input front camera image                                    |
-| `~/input/topic_img_fr` | `sensor_msgs::msg::Image`       | input front_right camera image                              |
-| `~/input/topic_img_bl` | `sensor_msgs::msg::Image`       | input back_left camera image                                |
-| `~/input/topic_img_b`  | `sensor_msgs::msg::Image`       | input back camera image                                     |
-| `~/input/topic_img_br` | `sensor_msgs::msg::Image`       | input back_right camera image                               |
+| Name                               | Type                           | Description                         |
+| ---------------------------------- | ------------------------------ | ----------------------------------- |
+| `~/input/topic_img_fl`             | `sensor_msgs::msg::Image`      | input front_left camera image       |
+| `~/input/topic_img_f`              | `sensor_msgs::msg::Image`      | input front camera image            |
+| `~/input/topic_img_fr`             | `sensor_msgs::msg::Image`      | input front_right camera image      |
+| `~/input/topic_img_bl`             | `sensor_msgs::msg::Image`      | input back_left camera image        |
+| `~/input/topic_img_b`              | `sensor_msgs::msg::Image`      | input back camera image             |
+| `~/input/topic_img_br`             | `sensor_msgs::msg::Image`      | input back_right camera image       |
+| `~/input/topic_img_fl/camera_info` | `sensor_msgs::msg::CameraInfo` | input front_left camera parameters  |
+| `~/input/topic_img_f/camera_info`  | `sensor_msgs::msg::CameraInfo` | input front camera parameters       |
+| `~/input/topic_img_fr/camera_info` | `sensor_msgs::msg::CameraInfo` | input front_right camera parameters |
+| `~/input/topic_img_bl/camera_info` | `sensor_msgs::msg::CameraInfo` | input back_left camera parameters   |
+| `~/input/topic_img_b/camera_info`  | `sensor_msgs::msg::CameraInfo` | input back camera parameters        |
+| `~/input/topic_img_br/camera_info` | `sensor_msgs::msg::CameraInfo` | input back_right camera parameters  |
 
 ### Outputs
 
-| Name                  | Type                                             | Description                               |
-| --------------------- | ------------------------------------------------ | ----------------------------------------- |
-| `~/output/boxes`      | `autoware_perception_msgs::msg::DetectedObjects` | detected objects                          |
-| `~/output/pointcloud` | `sensor_msgs::msg::PointCloud2`                  | output pointcloud (only used for display) |
+| Name             | Type                                             | Description      |
+| ---------------- | ------------------------------------------------ | ---------------- |
+| `~/output/boxes` | `autoware_perception_msgs::msg::DetectedObjects` | detected objects |
 
 ## Limittation
 
diff --git a/perception/autoware_tensorrt_bevdet/config/cams_param.yaml b/perception/autoware_tensorrt_bevdet/config/cams_param.yaml
deleted file mode 100644
index 6a171b5c81493..0000000000000
--- a/perception/autoware_tensorrt_bevdet/config/cams_param.yaml
+++ /dev/null
@@ -1,93 +0,0 @@
-cams:
-  CAM_BACK:
-    cam_intrinsic:
-      - [809.2209905677063, 0.0, 829.2196003259838]
-      - [0.0, 809.2209905677063, 481.77842384512485]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_BACK/n015-2018-08-02-17-16-37+0800__CAM_BACK__1533201470437525.jpg
-    sensor2ego_rotation:
-      [0.5037872666382278, -0.49740249788611096, -0.4941850223835201, 0.5045496097725578]
-    sensor2ego_translation: [0.0283260309358, 0.00345136761476, 1.57910346144]
-    sensor2lidar_rotation:
-      - [-0.9999413173368413, 0.009846645553456103, -0.004517239644395279]
-      - [0.004591280151619832, 0.0075097729192299114, -0.9999612609782784]
-      - [-0.009812340660088988, -0.9999233205011492, -0.007554540934044881]
-    sensor2lidar_translation: [-0.0036954598058400734, -0.9075747504218441, -0.283221870904093]
-  CAM_BACK_LEFT:
-    cam_intrinsic:
-      - [1256.7414812095406, 0.0, 792.1125740759628]
-      - [0.0, 1256.7414812095406, 492.7757465151356]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_BACK_LEFT/n015-2018-08-02-17-16-37+0800__CAM_BACK_LEFT__1533201470447423.jpg
-    sensor2ego_rotation:
-      [0.6924185592174665, -0.7031619420114925, -0.11648342771943819, 0.11203317912370753]
-    sensor2ego_translation: [1.03569100218, 0.484795032713, 1.59097014818]
-    sensor2lidar_rotation:
-      - [-0.3170637040675469, 0.019895609309549686, -0.9481955348413992]
-      - [0.9480779188624351, 0.03287371923632269, -0.3163345987226865]
-      - [0.024877044206231273, -0.9992614689428244, -0.029285651056243037]
-    sensor2lidar_translation: [-0.48313021193087025, 0.09925074956760227, -0.24976867779076528]
-  CAM_BACK_RIGHT:
-    cam_intrinsic:
-      - [1259.5137405846733, 0.0, 807.2529053838625]
-      - [0.0, 1259.5137405846733, 501.19579884916527]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_BACK_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_BACK_RIGHT__1533201470427893.jpg
-    sensor2ego_rotation:
-      [0.12280980120078765, -0.132400842670559, -0.7004305821388234, 0.690496031265798]
-    sensor2ego_translation: [1.0148780988, -0.480568219723, 1.56239545128]
-    sensor2lidar_rotation:
-      - [-0.3567246414755177, -0.005413898071006906, 0.934193887729864]
-      - [-0.9335307099588551, 0.04018099149456167, -0.3562385457614421]
-      - [-0.035608197481429044, -0.9991777507681946, -0.01938758989490032]
-    sensor2lidar_translation: [0.482312676691663, 0.0791837770804591, -0.27307179836459383]
-  CAM_FRONT:
-    cam_intrinsic:
-      - [1266.417203046554, 0.0, 816.2670197447984]
-      - [0.0, 1266.417203046554, 491.50706579294757]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_FRONT/n015-2018-08-02-17-16-37+0800__CAM_FRONT__1533201470412460.jpg
-    sensor2ego_rotation:
-      [0.4998015430569128, -0.5030316162024876, 0.4997798114386805, -0.49737083824542755]
-    sensor2ego_translation: [1.70079118954, 0.0159456324149, 1.51095763913]
-    sensor2lidar_rotation:
-      - [0.9999693728739966, 0.006755601474249302, -0.003951602548949129]
-      - [0.003824557675651031, 0.0187164545279605, 0.9998175168942008]
-      - [0.006828328680530752, -0.9998020084889915, 0.018690044109277416]
-    sensor2lidar_translation: [-0.012715806721303125, 0.7688055822719093, -0.31059455973338856]
-  CAM_FRONT_LEFT:
-    cam_intrinsic:
-      - [1272.5979470598488, 0.0, 826.6154927353808]
-      - [0.0, 1272.5979470598488, 479.75165386361925]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_FRONT_LEFT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_LEFT__1533201470404874.jpg
-    sensor2ego_rotation:
-      [0.6757265034669446, -0.6736266522251881, 0.21214015046209478, -0.21122827103904068]
-    sensor2ego_translation: [1.52387798135, 0.494631336551, 1.50932822144]
-    sensor2lidar_rotation:
-      - [0.5726028043564848, 0.0027092489875872503, -0.8198284505998853]
-      - [0.8195566437415105, 0.024067561593338945, 0.5724924979229841]
-      - [0.021282296451183572, -0.9997066632011964, 0.011560770255041255]
-    sensor2lidar_translation: [-0.4917212028847189, 0.5936531098472528, -0.31925386662383204]
-  CAM_FRONT_RIGHT:
-    cam_intrinsic:
-      - [1260.8474446004698, 0.0, 807.968244525554]
-      - [0.0, 1260.8474446004698, 495.3344268742088]
-      - [0.0, 0.0, 1.0]
-    data_path: ./data/nuscenes/samples/CAM_FRONT_RIGHT/n015-2018-08-02-17-16-37+0800__CAM_FRONT_RIGHT__1533201470420339.jpg
-    sensor2ego_rotation:
-      [0.2060347966337182, -0.2026940577919598, 0.6824507824531167, -0.6713610884174485]
-    sensor2ego_translation: [1.5508477543, -0.493404796419, 1.49574800619]
-    sensor2lidar_rotation:
-      - [0.5518728006969844, -0.010452325915577761, 0.8338627949092215]
-      - [-0.8335213795734647, 0.024319671348433384, 0.5519516857293306]
-      - [-0.0260484480307739, -0.9996495898405882, 0.004709128022186309]
-    sensor2lidar_translation: [0.49650027090206095, 0.6174621492795893, -0.32655958795607276]
-ego2global_rotation:
-  [0.9984303573176436, -0.008635865272570774, 0.0025833156025800875, -0.05527720957189669]
-ego2global_translation: [249.89610931430778, 917.5522573162784, 0.0]
-lidar2ego_rotation:
-  [0.7077955119163518, -0.006492242056004365, 0.010646214713995808, -0.7063073142877817]
-lidar2ego_translation: [0.943713, 0.0, 1.84023]
-scene_token: e7ef871f77f44331aefdebc24ec034b7
-timestamp: 1533201470448696
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 115746c31df3d..cf720a26587c3 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -62,63 +62,87 @@ void box3DToDetectedObjects(
   const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
   const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
 
+// Get the rotation and translation from a geometry_msgs::msg::TransformStamped
 void getTransform(
   const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
   Eigen::Translation3f & translation);
 
+// Get the camera intrinsics from a sensor_msgs::msg::CameraInfo
 void getCameraIntrinsics(
   const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
 
 class TRTBEVDetNode : public rclcpp::Node
 {
+  /**
+   * @class TRTBEVDetNode
+   * @brief This class represents a node for performing object detection using TensorRT on BEV
+   * (Bird's Eye View) images.
+   */
 private:
-  size_t img_N_;
-  int img_w_;
-  int img_h_;
-
-  std::string model_config_;
-
-  std::string onnx_file_;
-  std::string engine_file_;
-
-  YAML::Node camconfig_;
-
-  std::vector<std::string> imgs_name_;
-  std::vector<std::string> class_names_;
-
-  camsData sampleData_;
-  std::shared_ptr<BEVDet> bevdet_;
-
-  uchar * imgs_dev_ = nullptr;
-  float score_thre_;
-
-  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr pub_boxes_;
-  // Subscribers of camera info for each camera, no need to synchonrize
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_f_caminfo_;
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_fl_caminfo_;
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_fr_caminfo_;
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_b_caminfo_;
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_bl_caminfo_;
-  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr sub_br_caminfo_;
-  std::vector<bool> caminfo_received_;
-  bool camera_info_received_flag_ = false;
+  size_t img_N_;  ///< Number of images
+  int img_w_;     ///< Width of the images
+  int img_h_;     ///< Height of the images
+
+  std::string model_config_;  ///< Path to the model configuration file
+
+  std::string onnx_file_;    ///< Path to the ONNX file
+  std::string engine_file_;  ///< Path to the TensorRT engine file
+
+  std::vector<std::string> imgs_name_;    ///< Names of the images
+  std::vector<std::string> class_names_;  ///< Names of the object classes
+
+  camsData sampleData_;             ///< Sample data for camera parameters
+  std::shared_ptr<BEVDet> bevdet_;  ///< Object for performing object detection
+
+  uchar * imgs_dev_ = nullptr;  ///< Device pointer for storing the images
+  float score_thre_;            ///< Score threshold for object detection
+
+  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
+    pub_boxes_;  ///< Publisher for publishing the detected objects
+
+  // Subscribers of camera info for each camera, no need to synchronize
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_f_caminfo_;  ///< Subscriber for front camera info
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_fl_caminfo_;  ///< Subscriber for front-left camera info
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_fr_caminfo_;  ///< Subscriber for front-right camera info
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_b_caminfo_;  ///< Subscriber for back camera info
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_bl_caminfo_;  ///< Subscriber for back-left camera info
+  rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
+    sub_br_caminfo_;  ///< Subscriber for back-right camera info
+  std::vector<bool>
+    caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
+  bool camera_info_received_flag_ =
+    false;  ///< Flag indicating if camera info has been received for all cameras
 
   // tf listener
-  std::shared_ptr<tf2_ros::TransformListener> tf_listener_{nullptr};
-  std::unique_ptr<tf2_ros::Buffer> tf_buffer_;
+  std::shared_ptr<tf2_ros::TransformListener> tf_listener_{
+    nullptr};                                   ///< TF listener for transforming coordinates
+  std::unique_ptr<tf2_ros::Buffer> tf_buffer_;  ///< Buffer for storing TF transforms
 
   // Camera parameters;
-  std::vector<Eigen::Matrix3f> cams_intrin;
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-  std::vector<Eigen::Translation3f> cams2ego_trans;
-
-  // Subscribers of images for each camera, synchonrized
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_f_img_;
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_fl_img_;
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_fr_img_;
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_b_img_;
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_bl_img_;
-  message_filters::Subscriber<sensor_msgs::msg::Image> sub_br_img_;
+  std::vector<Eigen::Matrix3f> cams_intrin;  ///< Intrinsic camera parameters for each camera
+  std::vector<Eigen::Quaternion<float>>
+    cams2ego_rot;  ///< Rotation from camera frame to ego frame for each camera
+  std::vector<Eigen::Translation3f>
+    cams2ego_trans;  ///< Translation from camera frame to ego frame for each camera
+
+  // Subscribers of images for each camera, synchronized
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_f_img_;  ///< Subscriber for front camera image
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_fl_img_;  ///< Subscriber for front-left camera image
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_fr_img_;  ///< Subscriber for front-right camera image
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_b_img_;  ///< Subscriber for back camera image
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_bl_img_;  ///< Subscriber for back-left camera image
+  message_filters::Subscriber<sensor_msgs::msg::Image>
+    sub_br_img_;  ///< Subscriber for back-right camera image
 
   typedef message_filters::sync_policies::ApproximateTime<
     sensor_msgs::msg::Image, sensor_msgs::msg::Image, sensor_msgs::msg::Image,
@@ -126,20 +150,53 @@ class TRTBEVDetNode : public rclcpp::Node
     MySyncPolicy;
 
   typedef message_filters::Synchronizer<MySyncPolicy> Sync;
-  std::shared_ptr<Sync> sync_;
+  std::shared_ptr<Sync> sync_;  ///< Synchronizer for synchronizing image callbacks
 
   // Timer for checking initialization
-  rclcpp::TimerBase::SharedPtr timer_;
-  void initParameters();
+  rclcpp::TimerBase::SharedPtr timer_;  ///< Timer for checking initialization
+
+  /**
+   * @brief Starts the subscription to camera info topics for each camera.
+   */
   void startCameraInfoSubscription();
+
+  /**
+   * @brief Checks if the node has been initialized properly.
+   */
   void checkInitialization();
+
+  /**
+   * @brief Initializes the object detection model.
+   */
   void initModel();
+
+  /**
+   * @brief Starts the subscription to image topics for each camera.
+   */
   void startImageSubscription();
 
 public:
+  /**
+   * @brief Constructor for TRTBEVDetNode.
+   * @param node_name The name of the node.
+   * @param options The options for the node.
+   */
   TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
+
+  /**
+   * @brief Destructor for TRTBEVDetNode.
+   */
   ~TRTBEVDetNode();
 
+  /**
+   * @brief Callback function for synchronized image messages.
+   * @param msg_fl_img The front-left camera image message.
+   * @param msg_f_img The front camera image message.
+   * @param msg_fr_img The front-right camera image message.
+   * @param msg_bl_img The back-left camera image message.
+   * @param msg_b_img The back camera image message.
+   * @param msg_br_img The back-right camera image message.
+   */
   void callback(
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
@@ -148,6 +205,12 @@ class TRTBEVDetNode : public rclcpp::Node
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_b_img,
     const sensor_msgs::msg::Image::ConstSharedPtr & msg_br_img);
 
+  /**
+   * @brief Callback function for camera info messages. This function also reads from TF to get the
+   * transformation from the camera frame to the ego frame.
+   * @param idx The index of the camera.
+   * @param msg The camera info message.
+   */
   void camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg);
 };
 
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index a4b3bad296955..768911004629f 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -117,7 +117,7 @@ void getCameraIntrinsics(
 TRTBEVDetNode::TRTBEVDetNode(
   const std::string & node_name, const rclcpp::NodeOptions & node_options)
 : rclcpp::Node(node_name, node_options)
-{
+{  // Only start camera info subscription and tf listener at the beginning
   img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
   img_w_ = this->declare_parameter<int>("data_params.W", 1600);  // W: 1600
   img_h_ = this->declare_parameter<int>("data_params.H", 900);   // H: 900
@@ -127,12 +127,12 @@ TRTBEVDetNode::TRTBEVDetNode(
   cams2ego_rot = std::vector<Eigen::Quaternion<float>>(img_N_);
   cams2ego_trans = std::vector<Eigen::Translation3f>(img_N_);
 
-  tf_buffer_ =
-      std::make_unique<tf2_ros::Buffer>(this->get_clock());
-  tf_listener_ =
-      std::make_shared<tf2_ros::TransformListener>(*tf_buffer_);
+  tf_buffer_ = std::make_unique<tf2_ros::Buffer>(this->get_clock());
+  tf_listener_ = std::make_shared<tf2_ros::TransformListener>(*tf_buffer_);
 
   startCameraInfoSubscription();
+
+  // Wait for camera info and tf transform initialization
   timer_ = this->create_wall_timer(
     std::chrono::milliseconds(100), std::bind(&TRTBEVDetNode::checkInitialization, this));
 }
@@ -146,15 +146,12 @@ void TRTBEVDetNode::initModel()
   onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
   engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
 
-  // camconfig_ = YAML::LoadFile(this->declare_parameter("cam_config", "cams_param.yaml"));
-
   imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
   class_names_ =
     this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
 
-  // sampleData_.param = camParams(camconfig_, img_N_, imgs_name_);
   sampleData_.param = camParams(cams_intrin, cams2ego_rot, cams2ego_trans);
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
@@ -181,8 +178,7 @@ void TRTBEVDetNode::checkInitialization()
     startImageSubscription();
     timer_->cancel();
     timer_.reset();
-  }
-  else {
+  } else {
     RCLCPP_INFO_THROTTLE(
       this->get_logger(), *this->get_clock(), 5000,
       "Waiting for Camera Info and TF Transform Initialization...");
@@ -305,7 +301,8 @@ void TRTBEVDetNode::callback(
 
 void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg)
 {
-  if (caminfo_received_[idx]) return; // already received;  not expected to modify because of we init the model only once
+  if (caminfo_received_[idx])
+    return;  // already received;  not expected to modify because of we init the model only once
 
   Eigen::Matrix3f intrinsics;
   getCameraIntrinsics(msg, intrinsics);

From e03a881d56d1f00da50136c479188ee43ea24bb1 Mon Sep 17 00:00:00 2001
From: YuxuanLiuTier4Desktop <619684051@qq.com>
Date: Mon, 26 Aug 2024 10:54:49 +0900
Subject: [PATCH 51/85] try clean up spelling

Signed-off-by: YuxuanLiuTier4Desktop <619684051@qq.com>
---
 perception/autoware_tensorrt_bevdet/README.md |  4 +--
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  |  2 ++
 .../include/autoware/tensorrt_bevdet/data.hpp |  2 ++
 .../launch/tensorrt_bevdet.launch.xml         | 26 ++++++++++---------
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   |  3 +++
 .../src/bevdet_node.cpp                       |  2 ++
 6 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 2ad2e14bed2fe..30c5295ff3eb9 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -1,4 +1,4 @@
-# tensorrt_bevdet
+# tensorrt_bevdet <!-- cspell: ignore bevdet -->
 
 ## Purpose
 
@@ -33,7 +33,7 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 | ---------------- | ------------------------------------------------ | ---------------- |
 | `~/output/boxes` | `autoware_perception_msgs::msg::DetectedObjects` | detected objects |
 
-## Limittation
+## Limitation
 
 The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.
 
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index cf720a26587c3..5fa8515716a8e 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -11,6 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin
 #ifndef AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 25c6708f91583..4f045a1551435 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -11,6 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+// cspell:ignore BEVDET, intrin, Quater, BEVDET
 #ifndef AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
 
diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 601b71cfefbe3..337cfdeb8c79a 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -1,20 +1,22 @@
 <?xml version="1.0"?>
 <launch>
+  <!-- cspell:ignore nuscenes, bevdet dlongterm-->
+
   <!-- input topic name -->
-  <arg name="input/img_fl" default="/nuscenes/CAM_FRONT_LEFT/image"/>
-  <arg name="input/img_f" default="/nuscenes/CAM_FRONT/image"/>
-  <arg name="input/img_fr" default="/nuscenes/CAM_FRONT_RIGHT/image"/>
-  <arg name="input/img_bl" default="/nuscenes/CAM_BACK_LEFT/image"/>
-  <arg name="input/img_b" default="/nuscenes/CAM_BACK/image"/>
-  <arg name="input/img_br" default="/nuscenes/CAM_BACK_RIGHT/image"/>
+  <arg name="input/img_fl" default="/CAM_FRONT_LEFT/image"/>
+  <arg name="input/img_f" default="/CAM_FRONT/image"/>
+  <arg name="input/img_fr" default="/CAM_FRONT_RIGHT/image"/>
+  <arg name="input/img_bl" default="/CAM_BACK_LEFT/image"/>
+  <arg name="input/img_b" default="/CAM_BACK/image"/>
+  <arg name="input/img_br" default="/CAM_BACK_RIGHT/image"/>
   <arg name="output/boxes" default="boxes"/>
 
-  <arg name="input/img_fl/camera_info" default="/nuscenes/CAM_FRONT_LEFT/camera_info"/>
-  <arg name="input/img_f/camera_info" default="/nuscenes/CAM_FRONT/camera_info"/>
-  <arg name="input/img_fr/camera_info" default="/nuscenes/CAM_FRONT_RIGHT/camera_info"/>
-  <arg name="input/img_bl/camera_info" default="/nuscenes/CAM_BACK_LEFT/camera_info"/>
-  <arg name="input/img_b/camera_info" default="/nuscenes/CAM_BACK/camera_info"/>
-  <arg name="input/img_br/camera_info" default="/nuscenes/CAM_BACK_RIGHT/camera_info"/>
+  <arg name="input/img_fl/camera_info" default="/CAM_FRONT_LEFT/camera_info"/>
+  <arg name="input/img_f/camera_info" default="/CAM_FRONT/camera_info"/>
+  <arg name="input/img_fr/camera_info" default="/CAM_FRONT_RIGHT/camera_info"/>
+  <arg name="input/img_bl/camera_info" default="/CAM_BACK_LEFT/camera_info"/>
+  <arg name="input/img_b/camera_info" default="/CAM_BACK/camera_info"/>
+  <arg name="input/img_br/camera_info" default="/CAM_BACK_RIGHT/camera_info"/>
 
   <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet"/>
   <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx"/>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index c1ff893cc4f22..69b3b5eebe025 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -11,6 +11,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, bevpool, maxnum, zgrid
+// cspell:ignore gridbev, egobev, adjgird, currgrid
 #include "autoware/tensorrt_bevdet/bevdet.hpp"
 
 #include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 768911004629f..c55cc7af6343c 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -11,6 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm, RGBHWC, BGRCHW
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
 
 #include "autoware/tensorrt_bevdet/preprocess.hpp"

From f8cf04e35931354c2187f5f39a8d1a33de8a53ef Mon Sep 17 00:00:00 2001
From: YuxuanLiuTier4Desktop <619684051@qq.com>
Date: Mon, 26 Aug 2024 10:58:59 +0900
Subject: [PATCH 52/85] try fixing cspell

Signed-off-by: YuxuanLiuTier4Desktop <619684051@qq.com>
---
 .../autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml  | 2 +-
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 337cfdeb8c79a..4dd254261ad5a 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <launch>
-  <!-- cspell:ignore nuscenes, bevdet dlongterm-->
+  <!-- cspell:ignore nuscenes, bevdet, dlongterm -->
 
   <!-- input topic name -->
   <arg name="input/img_fl" default="/CAM_FRONT_LEFT/image"/>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 69b3b5eebe025..4481f0fef9b94 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, bevpool, maxnum, zgrid
-// cspell:ignore gridbev, egobev, adjgird, currgrid
+// cspell:ignore gridbev, egobev, adjgrid, currgrid
 #include "autoware/tensorrt_bevdet/bevdet.hpp"
 
 #include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"

From 56adbeaa6481324833a26a23ca07d7a17a5279cd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 12:51:04 +0000
Subject: [PATCH 53/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp      | 4 ++--
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 4481f0fef9b94..ae877699b2fa1 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, bevpool, maxnum, zgrid
-// cspell:ignore gridbev, egobev, adjgrid, currgrid
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, bevpool, maxnum,
+// zgrid cspell:ignore gridbev, egobev, adjgrid, currgrid
 #include "autoware/tensorrt_bevdet/bevdet.hpp"
 
 #include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index c55cc7af6343c..26c1bab6b914d 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm, RGBHWC, BGRCHW
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm, RGBHWC,
+// BGRCHW
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
 
 #include "autoware/tensorrt_bevdet/preprocess.hpp"

From 32a5cfa5d129e730076cbbd5347d09b72192a86c Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 28 Aug 2024 14:14:19 +0800
Subject: [PATCH 54/85] fix copyright

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp                 | 2 +-
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp            | 2 +-
 .../include/autoware/tensorrt_bevdet/common.hpp                 | 2 +-
 .../include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp        | 2 +-
 .../include/autoware/tensorrt_bevdet/data.hpp                   | 2 +-
 .../include/autoware/tensorrt_bevdet/iou3d_nms.hpp              | 2 +-
 .../include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp          | 2 +-
 .../include/autoware/tensorrt_bevdet/postprocess.hpp            | 2 +-
 .../include/autoware/tensorrt_bevdet/preprocess.hpp             | 2 +-
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp              | 2 +-
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp         | 2 +-
 perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp     | 2 +-
 perception/autoware_tensorrt_bevdet/src/data.cpp                | 2 +-
 perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu            | 2 +-
 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp       | 2 +-
 perception/autoware_tensorrt_bevdet/src/postprocess.cu          | 2 +-
 perception/autoware_tensorrt_bevdet/src/preprocess.cu           | 2 +-
 17 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index bd0ae58b3a4b7..3276901ff4fd2 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 5fa8515716a8e..7281570eafcac 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
index 966f52e3ce5be..a1a745d9e604f 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
index cfc5248856e5b..94a301976fe6a 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
index 4f045a1551435..153795b9d0f16 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
index 5a95be4465317..609b36717feab 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
index b3c91f20bd1d3..dad94eb9c4af2 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
index c7d0293f34888..35a2fe0d44829 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
index e5aaedb972e5f..1c09fc925d8b5 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index ae877699b2fa1..61b1ee365fcac 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 26c1bab6b914d..bb3bb9143d638 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
index c00296bc41e34..1afe91becfb95 100644
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
index 9171dc7b65b84..6dbb02015b5a2 100644
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/data.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
index fda6e0fce93f8..fe68a6f03bb0f 100644
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 19a7c168b6201..33853437e96fe 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
index 009c2bf279c68..c7ac66a3236e6 100644
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess.cu b/perception/autoware_tensorrt_bevdet/src/preprocess.cu
index 3b48eb9a809e7..2a1bdc426a42f 100644
--- a/perception/autoware_tensorrt_bevdet/src/preprocess.cu
+++ b/perception/autoware_tensorrt_bevdet/src/preprocess.cu
@@ -1,4 +1,4 @@
-// Copyright 2024 TIER IV, Inc.
+// Copyright 2024 AutoCore, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

From aee264f763740660fa9521a025d0a11a1a89d11d Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 29 Aug 2024 16:43:21 +0800
Subject: [PATCH 55/85] delete specific onnx and engine file path in
 tier4_perception_launch

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../detector/camera_bev_detector.launch.xml    | 18 ++++++++++++------
 .../config/bevdet.param.yaml                   |  3 +++
 .../launch/tensorrt_bevdet.launch.xml          | 10 ++++------
 .../src/bevdet_node.cpp                        |  4 ++--
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
index bad876765051d..a8b1f3f63985d 100644
--- a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
+++ b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/camera_bev_detector.launch.xml
@@ -20,23 +20,29 @@
   <arg name="number_of_cameras"/>
 
   <!-- Model parameters -->
-  <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet" description="packages data and artifacts directory path"/>
-  <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx" description="onnx model file path"/>
-  <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine" description="engine model file path"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
+  <arg name="bevdet_model_name" default="bevdet_one_lt_d"/>
+  <arg name="bevdet_model_path" default="$(var data_path)/tensorrt_bevdet"/>
   <!-- BEVDet -->
   <group>
     <push-ros-namespace namespace="bevdet"/>
     <include file="$(find-pkg-share autoware_tensorrt_bevdet)/launch/tensorrt_bevdet.launch.xml">
       <arg name="input/img_fl" value="$(var input/camera0/image)"/>
+      <arg name="input/img_fl/camera_info" value="$(var input/camera0/info)"/>
       <arg name="input/img_f" value="$(var input/camera1/image)"/>
+      <arg name="input/img_f/camera_info" value="$(var input/camera1/info)"/>
       <arg name="input/img_fr" value="$(var input/camera2/image)"/>
+      <arg name="input/img_fr/camera_info" value="$(var input/camera2/info)"/>
       <arg name="input/img_bl" value="$(var input/camera3/image)"/>
+      <arg name="input/img_bl/camera_info" value="$(var input/camera3/info)"/>
       <arg name="input/img_b" value="$(var input/camera4/image)"/>
+      <arg name="input/img_b/camera_info" value="$(var input/camera4/info)"/>
       <arg name="input/img_br" value="$(var input/camera5/image)"/>
+      <arg name="input/img_br/camera_info" value="$(var input/camera5/info)"/>
       <arg name="output/boxes" value="boxes"/>
-      <arg name="data_path" value="$(var data_path)/"/>
-      <arg name="onnx_file" value="$(var onnx_file)"/>
-      <arg name="engine_file" value="$(var engine_file)"/>
+      <arg name="data_path" value="$(var data_path)"/>
+      <arg name="model_name" value="$(var bevdet_model_name)"/>
+      <arg name="model_path" value="$(var bevdet_model_path)"/>
     </include>
   </group>
 </launch>
diff --git a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
index 193c7ad272236..e7140328a5e6e 100644
--- a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
+++ b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
@@ -1,5 +1,8 @@
 /**:
     ros__parameters:
+      # weight files
+      onnx_path: "$(var model_path)/$(var model_name).onnx"
+      engine_path: "$(var model_path)/$(var model_name).engine"
       data_params:
         N: 6  # camera num
         H: 900 # image height
diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 4dd254261ad5a..0017ec5748cc8 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -18,9 +18,9 @@
   <arg name="input/img_b/camera_info" default="/CAM_BACK/camera_info"/>
   <arg name="input/img_br/camera_info" default="/CAM_BACK_RIGHT/camera_info"/>
 
-  <arg name="data_path" default="$(env HOME)/autoware_data/autoware_tensorrt_bevdet"/>
-  <arg name="onnx_file" default="$(var data_path)/bevdet_one_lt_d.onnx"/>
-  <arg name="engine_file" default="$(var data_path)/bevdet_one_lt_d.engine"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data"/>
+  <arg name="model_name" default="bevdet_one_lt_d"/>
+  <arg name="model_path" default="$(var data_path)/tensorrt_bevdet"/>
   <arg name="model_config" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet_r50_4dlongterm_depth.yaml"/>
   <arg name="param_file" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet.param.yaml"/>
 
@@ -43,9 +43,7 @@
 
     <remap from="~/output/boxes" to="$(var output/boxes)"/>
 
-    <param name="onnx_file" value="$(var onnx_file)"/>
-    <param name="engine_file" value="$(var engine_file)"/>
     <param name="model_config" value="$(var model_config)"/>
-    <param from="$(var param_file)"/>
+    <param from="$(var param_file)" allow_substs="true"/>
   </node>
 </launch>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index bb3bb9143d638..3841658b97767 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -146,8 +146,8 @@ void TRTBEVDetNode::initModel()
 
   model_config_ = this->declare_parameter("model_config", "bevdet_r50_4dlongterm_depth.yaml");
 
-  onnx_file_ = this->declare_parameter("onnx_file", "bevdet_one_lt_d.onnx");
-  engine_file_ = this->declare_parameter("engine_file", "bevdet_one_lt_d.engine");
+  onnx_file_ = this->declare_parameter<std::string>("onnx_path", "bevdet_one_lt_d.onnx");
+  engine_file_ = this->declare_parameter<std::string>("engine_path", "bevdet_one_lt_d.engine");
 
   imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
   class_names_ =

From 5c071fea3dded7752512d2f6069714ebe40492c2 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 29 Aug 2024 18:03:50 +0800
Subject: [PATCH 56/85] fix cppcheck-differential

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp        | 1 -
 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp | 2 --
 2 files changed, 3 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 61b1ee365fcac..ed84bd6170733 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -446,7 +446,6 @@ int BEVDet::InitEngine(const std::string & engine_file)
 int BEVDet::DeserializeTRTEngine(
   const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr)
 {
-  int verbosity = static_cast<int>(nvinfer1::ILogger::Severity::kWARNING);
   std::stringstream engine_stream;
   engine_stream.seekg(0, engine_stream.beg);
 
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 33853437e96fe..3156e362dabe8 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -220,7 +220,6 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   }
   if (prepare_buffers(files_data, widths, heights, iout, isz, share_param)) return EXIT_FAILURE;
 
-  double total_time = 0;
   double times[6];
 
   auto decode_start = high_resolution_clock::now();
@@ -232,7 +231,6 @@ int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uch
   }
   for (size_t i = 0; i < files_data.size(); i++) {
     threads[i].join();
-    total_time += times[i];
   }
   auto decode_end = high_resolution_clock::now();
   duration<double> decode_time = decode_end - decode_start;

From 30ea6b59384e13282caa4a16f111db6a4dfb2ad7 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 2 Sep 2024 09:17:29 +0800
Subject: [PATCH 57/85] fix(bevdet.cpp): modify function naming

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet.hpp       | 20 +++++-----
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 38 +++++++++----------
 .../src/bevdet_node.cpp                       |  2 +-
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index 3276901ff4fd2..2659d2737f151 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -121,34 +121,34 @@ class BEVDet
     std::vector<Eigen::Translation3f> _cams2ego_trans, const std::string & onnx_file,
     const std::string & engine_file);
 
-  int DoInfer(
+  int doInfer(
     const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx = -1);
   ~BEVDet();
 
 protected:
-  void InitParams(const std::string & config_file);
+  void initParams(const std::string & config_file);
 
-  void InitViewTransformer(
+  void initViewTransformer(
     std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
     std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
     std::shared_ptr<int> & interval_lengths_ptr);
-  void ExportEngine(const std::string & onnxFile, const std::string & trtFile);
-  int InitEngine(const std::string & engine_file);
+  void exportEngine(const std::string & onnxFile, const std::string & trtFile);
+  int initEngine(const std::string & engine_file);
 
-  int DeserializeTRTEngine(const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr);
+  int deserializeTRTEngine(const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr);
 
-  void MallocDeviceMemory();
+  void mallocDeviceMemory();
 
-  void InitCamParams(
+  void initCamParams(
     const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
     const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
     const std::vector<Eigen::Matrix3f> & cams_intrin);
 
-  void GetAdjBEVFeature(
+  void getAdjBEVFeature(
     const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
     const Eigen::Translation3f & ego2global_trans);
 
-  void GetCurr2AdjTransform(
+  void getCurr2AdjTransform(
     const Eigen::Quaternion<float> & curr_ego2global_rot,
     const Eigen::Quaternion<float> & adj_ego2global_rot,
     const Eigen::Translation3f & curr_ego2global_trans,
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index ed84bd6170733..30292624fded8 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -39,7 +39,7 @@ BEVDet::BEVDet(
   const std::string & engine_file)
 : cams_intrin(_cams_intrin), cams2ego_rot(_cams2ego_rot), cams2ego_trans(_cams2ego_trans)
 {
-  InitParams(config_file);
+  initParams(config_file);
   if (n_img != N_img) {
     RCLCPP_ERROR(
       rclcpp::get_logger("BEVDet"), "BEVDet need %d images, but given %d images!", N_img, n_img);
@@ -52,7 +52,7 @@ BEVDet::BEVDet(
   std::shared_ptr<int> interval_starts_ptr = nullptr;
   std::shared_ptr<int> interval_lengths_ptr = nullptr;
 
-  InitViewTransformer(
+  initViewTransformer(
     ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
@@ -66,10 +66,10 @@ BEVDet::BEVDet(
     RCLCPP_WARN(
       rclcpp::get_logger("BEVDet"), "Could not find %s, try making TensorRT engine from onnx",
       engine_file.c_str());
-    ExportEngine(onnx_file, engine_file);
+    exportEngine(onnx_file, engine_file);
   }
-  InitEngine(engine_file);  // FIXME
-  MallocDeviceMemory();
+  initEngine(engine_file);  // FIXME
+  mallocDeviceMemory();
 
   if (use_adj) {
     adj_frame_ptr.reset(new adjFrame(adj_num, trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
@@ -99,7 +99,7 @@ BEVDet::BEVDet(
     trt_buffer_dev[buffer_map["std"]], std.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
 }
 
-void BEVDet::InitCamParams(
+void BEVDet::initCamParams(
   const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
   const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
   const std::vector<Eigen::Matrix3f> & curr_cams_intrin)
@@ -138,7 +138,7 @@ void BEVDet::InitCamParams(
     trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
 }
 
-void BEVDet::InitParams(const std::string & config_file)
+void BEVDet::initParams(const std::string & config_file)
 {
   mean = std::vector<float>(3);
   std = std::vector<float>(3);
@@ -229,7 +229,7 @@ void BEVDet::InitParams(const std::string & config_file)
     bev_h, bev_w, x_step, y_step, x_start, y_start, class_num_pre_task, nms_rescale_factor));
 }
 
-void BEVDet::MallocDeviceMemory()
+void BEVDet::mallocDeviceMemory()
 {
   trt_buffer_sizes.resize(trt_engine->getNbBindings());
   trt_buffer_dev = reinterpret_cast<void **>(new void *[trt_engine->getNbBindings()]);
@@ -259,7 +259,7 @@ void BEVDet::MallocDeviceMemory()
   return;
 }
 
-void BEVDet::InitViewTransformer(
+void BEVDet::initViewTransformer(
   std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
   std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
   std::shared_ptr<int> & interval_lengths_ptr)
@@ -396,9 +396,9 @@ void print_dim(nvinfer1::Dims dim, const std::string & name)
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "%s", oss.str().c_str());
 }
 
-int BEVDet::InitEngine(const std::string & engine_file)
+int BEVDet::initEngine(const std::string & engine_file)
 {
-  if (DeserializeTRTEngine(engine_file, &trt_engine)) {
+  if (deserializeTRTEngine(engine_file, &trt_engine)) {
     return EXIT_FAILURE;
   }
 
@@ -443,7 +443,7 @@ int BEVDet::InitEngine(const std::string & engine_file)
   return EXIT_SUCCESS;
 }
 
-int BEVDet::DeserializeTRTEngine(
+int BEVDet::deserializeTRTEngine(
   const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr)
 {
   std::stringstream engine_stream;
@@ -485,7 +485,7 @@ int BEVDet::DeserializeTRTEngine(
   return EXIT_SUCCESS;
 }
 
-void BEVDet::GetAdjBEVFeature(
+void BEVDet::getAdjBEVFeature(
   const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
   const Eigen::Translation3f & ego2global_trans)
 {
@@ -509,7 +509,7 @@ void BEVDet::GetAdjBEVFeature(
     Eigen::Translation3f adj_ego2global_trans;
     adj_frame_ptr->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
 
-    GetCurr2AdjTransform(
+    getCurr2AdjTransform(
       ego2global_rot, adj_ego2global_rot, ego2global_trans, adj_ego2global_trans,
       reinterpret_cast<float *>(trt_buffer_dev[buffer_map["transforms"]]) + i * transform_size);
   }
@@ -518,7 +518,7 @@ void BEVDet::GetAdjBEVFeature(
     cudaMemcpyHostToDevice));
 }
 
-void BEVDet::GetCurr2AdjTransform(
+void BEVDet::getCurr2AdjTransform(
   const Eigen::Quaternion<float> & curr_ego2global_rot,
   const Eigen::Quaternion<float> & adj_ego2global_rot,
   const Eigen::Translation3f & curr_ego2global_trans,
@@ -575,7 +575,7 @@ void BEVDet::GetCurr2AdjTransform(
     transform_size * sizeof(float), cudaMemcpyHostToDevice));
 }
 
-int BEVDet::DoInfer(
+int BEVDet::doInfer(
   const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
 {
   RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "---------%d---------", idx + 1);
@@ -585,10 +585,10 @@ int BEVDet::DoInfer(
     trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, trt_buffer_sizes[buffer_map["images"]],
     cudaMemcpyDeviceToDevice));
 
-  InitCamParams(
+  initCamParams(
     cam_data.param.cams2ego_rot, cam_data.param.cams2ego_trans, cam_data.param.cams_intrin);
 
-  GetAdjBEVFeature(
+  getAdjBEVFeature(
     cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
   if (!trt_context->executeV2(trt_buffer_dev)) {
@@ -619,7 +619,7 @@ int BEVDet::DoInfer(
   return EXIT_SUCCESS;
 }
 
-void BEVDet::ExportEngine(const std::string & onnxFile, const std::string & trtFile)
+void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtFile)
 {
   CHECK_CUDA(cudaSetDevice(0));
   nvinfer1::ICudaEngine * engine = nullptr;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 3841658b97767..f0ea362e2b387 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -291,7 +291,7 @@ void TRTBEVDetNode::callback(
   ego_boxes.clear();
   float time = 0.f;
 
-  bevdet_->DoInfer(sampleData_, ego_boxes, time);
+  bevdet_->doInfer(sampleData_, ego_boxes, time);
 
   autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
   bevdet_objects.header.frame_id = "base_link";

From b0634907c88045db58ca9b4c5e82317228267bd4 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 2 Sep 2024 10:14:43 +0800
Subject: [PATCH 58/85] fix: add the namespace autoware::tensorrt_bevdet

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp               | 4 +++-
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp          | 2 +-
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp            | 3 +++
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp       | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index 2659d2737f151..a4db4354c3fff 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -31,6 +31,8 @@
 #include <string>
 #include <vector>
 
+namespace autoware::tensorrt_bevdet
+{
 class adjFrame
 {
 public:
@@ -235,5 +237,5 @@ class BEVDet
   std::unique_ptr<PostprocessGPU> postprocess_ptr;
   std::unique_ptr<adjFrame> adj_frame_ptr;
 };
-
+}//namespce autoware::tensorrt_bevdet
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 7281570eafcac..6d8d7e1307030 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -94,7 +94,7 @@ class TRTBEVDetNode : public rclcpp::Node
   std::vector<std::string> class_names_;  ///< Names of the object classes
 
   camsData sampleData_;             ///< Sample data for camera parameters
-  std::shared_ptr<BEVDet> bevdet_;  ///< Object for performing object detection
+  std::shared_ptr<autoware::tensorrt_bevdet::BEVDet> bevdet_;  ///< Object for performing object detection
 
   uchar * imgs_dev_ = nullptr;  ///< Device pointer for storing the images
   float score_thre_;            ///< Score threshold for object detection
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 30292624fded8..474db685d6e09 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -32,6 +32,8 @@
 using std::chrono::duration;
 using std::chrono::high_resolution_clock;
 
+namespace autoware::tensorrt_bevdet
+{
 BEVDet::BEVDet(
   const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> _cams_intrin,
   std::vector<Eigen::Quaternion<float>> _cams2ego_rot,
@@ -710,3 +712,4 @@ BEVDet::~BEVDet()
   trt_context->destroy();
   trt_engine->destroy();
 }
+}//namespace autoware::tensorrt_bevdet
\ No newline at end of file
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index f0ea362e2b387..18d8ab39cd801 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -159,7 +159,7 @@ void TRTBEVDetNode::initModel()
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
 
-  bevdet_ = std::make_shared<BEVDet>(
+  bevdet_ = std::make_shared<autoware::tensorrt_bevdet::BEVDet>(
     model_config_, img_N_, sampleData_.param.cams_intrin, sampleData_.param.cams2ego_rot,
     sampleData_.param.cams2ego_trans, onnx_file_, engine_file_);
 

From 4e30fef36ec646e18b68fb2e47cdbde212f14d61 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 02:17:55 +0000
Subject: [PATCH 59/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp              | 2 +-
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp         | 5 +++--
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp           | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index a4db4354c3fff..d044ad0d5ca29 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -237,5 +237,5 @@ class BEVDet
   std::unique_ptr<PostprocessGPU> postprocess_ptr;
   std::unique_ptr<adjFrame> adj_frame_ptr;
 };
-}//namespce autoware::tensorrt_bevdet
+}  // namespace autoware::tensorrt_bevdet
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 6d8d7e1307030..80b22420e2f58 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -93,8 +93,9 @@ class TRTBEVDetNode : public rclcpp::Node
   std::vector<std::string> imgs_name_;    ///< Names of the images
   std::vector<std::string> class_names_;  ///< Names of the object classes
 
-  camsData sampleData_;             ///< Sample data for camera parameters
-  std::shared_ptr<autoware::tensorrt_bevdet::BEVDet> bevdet_;  ///< Object for performing object detection
+  camsData sampleData_;  ///< Sample data for camera parameters
+  std::shared_ptr<autoware::tensorrt_bevdet::BEVDet>
+    bevdet_;  ///< Object for performing object detection
 
   uchar * imgs_dev_ = nullptr;  ///< Device pointer for storing the images
   float score_thre_;            ///< Score threshold for object detection
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 474db685d6e09..7361a047e8a51 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -712,4 +712,4 @@ BEVDet::~BEVDet()
   trt_context->destroy();
   trt_engine->destroy();
 }
-}//namespace autoware::tensorrt_bevdet
\ No newline at end of file
+}  // namespace autoware::tensorrt_bevdet

From 4946e5587965bea327f539f5affa87139e5cdd4e Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 2 Sep 2024 11:04:07 +0800
Subject: [PATCH 60/85] trying fix cppcheck-differential and add maintainer

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/package.xml           | 2 +-
 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/package.xml b/perception/autoware_tensorrt_bevdet/package.xml
index a3ad3bfc30dd6..d73a11cf6bad9 100644
--- a/perception/autoware_tensorrt_bevdet/package.xml
+++ b/perception/autoware_tensorrt_bevdet/package.xml
@@ -6,7 +6,7 @@
 
   <author email="cynthia.liu@autocore.ai">Cynthia</author>
   <maintainer email="cynthia.liu@autocore.ai">Cynthia</maintainer>
-
+  <maintainer email="yliuhb@connect.ust.hk">Yuxuan Liu</maintainer>
   <license>Apache License 2.0</license>
 
   <buildtool_depend>ament_cmake_auto</buildtool_depend>
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 3156e362dabe8..9a0deba682ccd 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -192,7 +192,8 @@ void decode_single_image(
     params.jpeg_streams[buffer_index], params.stream));
 
   buffer_index = 1 - buffer_index;  // switch pinned buffer in pipeline mode to avoid an extra sync
-
+  RCLCPP_DEBUG(rclcpp::get_logger("nvjpegDecoder"), buffer_index);
+  
   CHECK_NVJPEG(nvjpegDecodeJpegDevice(
     share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state, &out,
     params.stream));

From 7127f09af5d85d700ea8e379ad996286cb3c804d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 03:06:33 +0000
Subject: [PATCH 61/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
index 9a0deba682ccd..7428c997e94e9 100644
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
@@ -193,7 +193,7 @@ void decode_single_image(
 
   buffer_index = 1 - buffer_index;  // switch pinned buffer in pipeline mode to avoid an extra sync
   RCLCPP_DEBUG(rclcpp::get_logger("nvjpegDecoder"), buffer_index);
-  
+
   CHECK_NVJPEG(nvjpegDecodeJpegDevice(
     share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state, &out,
     params.stream));

From f9bbd0943a5309453ddfb61ac6481cb53878cc50 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 3 Sep 2024 13:36:06 +0800
Subject: [PATCH 62/85] fix: git H and W from CameraInfo and replace uint32_t
 with int

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp       | 5 +++--
 perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp    | 7 +++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 80b22420e2f58..62034bb3e7142 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -82,8 +82,8 @@ class TRTBEVDetNode : public rclcpp::Node
    */
 private:
   size_t img_N_;  ///< Number of images
-  int img_w_;     ///< Width of the images
-  int img_h_;     ///< Height of the images
+  uint32_t img_w_;     ///< Width of the images
+  uint32_t img_h_;     ///< Height of the images
 
   std::string model_config_;  ///< Path to the model configuration file
 
@@ -120,6 +120,7 @@ class TRTBEVDetNode : public rclcpp::Node
     caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
   bool camera_info_received_flag_ =
     false;  ///< Flag indicating if camera info has been received for all cameras
+  bool initialized_ = false; ///Flag indicating if img_w_ and img_h_ has been initialized
 
   // tf listener
   std::shared_ptr<tf2_ros::TransformListener> tf_listener_{
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 18d8ab39cd801..8b78542920eee 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -122,8 +122,6 @@ TRTBEVDetNode::TRTBEVDetNode(
 : rclcpp::Node(node_name, node_options)
 {  // Only start camera info subscription and tf listener at the beginning
   img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
-  img_w_ = this->declare_parameter<int>("data_params.W", 1600);  // W: 1600
-  img_h_ = this->declare_parameter<int>("data_params.H", 900);   // H: 900
 
   caminfo_received_ = std::vector<bool>(img_N_, false);
   cams_intrin = std::vector<Eigen::Matrix3f>(img_N_);
@@ -307,6 +305,11 @@ void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::Camera
   if (caminfo_received_[idx])
     return;  // already received;  not expected to modify because of we init the model only once
 
+    if(!initialized_){ //get image width and height
+      img_w_ = msg->width;
+      img_h_ = msg->height;
+      initialized_ = true;
+    }
   Eigen::Matrix3f intrinsics;
   getCameraIntrinsics(msg, intrinsics);
   cams_intrin[idx] = intrinsics;

From 534b1c219e6cdfa58316b8c2c8ac08fd4964a531 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 05:39:06 +0000
Subject: [PATCH 63/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet_node.hpp | 10 +++++-----
 .../autoware_tensorrt_bevdet/src/bevdet_node.cpp     | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 62034bb3e7142..1a493bb2f8889 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -81,9 +81,9 @@ class TRTBEVDetNode : public rclcpp::Node
    * (Bird's Eye View) images.
    */
 private:
-  size_t img_N_;  ///< Number of images
-  uint32_t img_w_;     ///< Width of the images
-  uint32_t img_h_;     ///< Height of the images
+  size_t img_N_;    ///< Number of images
+  uint32_t img_w_;  ///< Width of the images
+  uint32_t img_h_;  ///< Height of the images
 
   std::string model_config_;  ///< Path to the model configuration file
 
@@ -119,8 +119,8 @@ class TRTBEVDetNode : public rclcpp::Node
   std::vector<bool>
     caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
   bool camera_info_received_flag_ =
-    false;  ///< Flag indicating if camera info has been received for all cameras
-  bool initialized_ = false; ///Flag indicating if img_w_ and img_h_ has been initialized
+    false;                    ///< Flag indicating if camera info has been received for all cameras
+  bool initialized_ = false;  /// Flag indicating if img_w_ and img_h_ has been initialized
 
   // tf listener
   std::shared_ptr<tf2_ros::TransformListener> tf_listener_{
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 8b78542920eee..46bac289cd501 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -121,7 +121,7 @@ TRTBEVDetNode::TRTBEVDetNode(
   const std::string & node_name, const rclcpp::NodeOptions & node_options)
 : rclcpp::Node(node_name, node_options)
 {  // Only start camera info subscription and tf listener at the beginning
-  img_N_ = this->declare_parameter<int>("data_params.N", 6);     // camera num 6
+  img_N_ = this->declare_parameter<int>("data_params.N", 6);  // camera num 6
 
   caminfo_received_ = std::vector<bool>(img_N_, false);
   cams_intrin = std::vector<Eigen::Matrix3f>(img_N_);
@@ -305,11 +305,11 @@ void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::Camera
   if (caminfo_received_[idx])
     return;  // already received;  not expected to modify because of we init the model only once
 
-    if(!initialized_){ //get image width and height
-      img_w_ = msg->width;
-      img_h_ = msg->height;
-      initialized_ = true;
-    }
+  if (!initialized_) {  // get image width and height
+    img_w_ = msg->width;
+    img_h_ = msg->height;
+    initialized_ = true;
+  }
   Eigen::Matrix3f intrinsics;
   getCameraIntrinsics(msg, intrinsics);
   cams_intrin[idx] = intrinsics;

From c6161c4ae2546d115d7abf11ac369a6c47babdb5 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 4 Sep 2024 11:30:23 +0800
Subject: [PATCH 64/85] fix: change the class name to UpperCamelCase style

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp        | 10 +++++-----
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp     |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index d044ad0d5ca29..3a00385301d07 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -33,11 +33,11 @@
 
 namespace autoware::tensorrt_bevdet
 {
-class adjFrame
+class AdjFrame
 {
 public:
-  adjFrame() {}
-  adjFrame(int _n, size_t _buf_size)
+  AdjFrame() {}
+  AdjFrame(int _n, size_t _buf_size)
   : n(_n), buf_size(_buf_size), scenes_token(_n), ego2global_rot(_n), ego2global_trans(_n)
   {
     CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&adj_buffer), _n * _buf_size));
@@ -96,7 +96,7 @@ class adjFrame
     adj_ego2global_trans = ego2global_trans[idx];
   }
 
-  ~adjFrame() { CHECK_CUDA(cudaFree(adj_buffer)); }
+  ~AdjFrame() { CHECK_CUDA(cudaFree(adj_buffer)); }
 
 private:
   int n;
@@ -235,7 +235,7 @@ class BEVDet
   nvinfer1::IExecutionContext * trt_context;
 
   std::unique_ptr<PostprocessGPU> postprocess_ptr;
-  std::unique_ptr<adjFrame> adj_frame_ptr;
+  std::unique_ptr<AdjFrame> adj_frame_ptr;
 };
 }  // namespace autoware::tensorrt_bevdet
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 7361a047e8a51..de4557aaf3373 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -74,7 +74,7 @@ BEVDet::BEVDet(
   mallocDeviceMemory();
 
   if (use_adj) {
-    adj_frame_ptr.reset(new adjFrame(adj_num, trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
+    adj_frame_ptr.reset(new AdjFrame(adj_num, trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
   }
 
   cam_params_host = new float[N_img * cam_params_size];

From 47b7349f5b76b09ca853c7aac4d5ee4c31a077c1 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 4 Sep 2024 14:50:14 +0800
Subject: [PATCH 65/85] fix: add _ suffix to member variables in Class AdjFrame

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet.hpp       | 74 +++++++++----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index 3a00385301d07..df173b2aea82c 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -37,78 +37,78 @@ class AdjFrame
 {
 public:
   AdjFrame() {}
-  AdjFrame(int _n, size_t _buf_size)
-  : n(_n), buf_size(_buf_size), scenes_token(_n), ego2global_rot(_n), ego2global_trans(_n)
+  AdjFrame(int n, size_t buf_size)
+  : n_(n), buf_size_(buf_size), scenes_token_(n), ego2global_rot_(n), ego2global_trans_(n)
   {
-    CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&adj_buffer), _n * _buf_size));
-    CHECK_CUDA(cudaMemset(adj_buffer, 0, _n * _buf_size));
-    last = 0;
-    buffer_num = 0;
-    init = false;
+    CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&adj_buffer), n * buf_size));
+    CHECK_CUDA(cudaMemset(adj_buffer, 0, n * buf_size));
+    last_ = 0;
+    buffer_num_ = 0;
+    init_ = false;
 
-    for (auto & rot : ego2global_rot) {
+    for (auto & rot : ego2global_rot_) {
       rot = Eigen::Quaternion<float>(0.f, 0.f, 0.f, 0.f);
     }
-    for (auto & trans : ego2global_trans) {
+    for (auto & trans : ego2global_trans_) {
       trans = Eigen::Translation3f(0.f, 0.f, 0.f);
     }
   }
-  const std::string & lastScenesToken() const { return scenes_token[last]; }
+  const std::string & lastScenesToken() const { return scenes_token_[last_]; }
 
   void reset()
   {
-    last = 0;  // origin -1
-    buffer_num = 0;
-    init = false;
+    last_ = 0;  // origin -1
+    buffer_num_ = 0;
+    init_ = false;
   }
 
   void saveFrameBuffer(
     const void * curr_buffer, const std::string & curr_token,
-    const Eigen::Quaternion<float> & _ego2global_rot,
-    const Eigen::Translation3f & _ego2global_trans)
+    const Eigen::Quaternion<float> & ego2global_rot,
+    const Eigen::Translation3f & ego2global_trans)
   {
-    int iters = init ? 1 : n;
+    int iters = init_ ? 1 : n_;
     while (iters--) {
-      last = (last + 1) % n;
+      last_ = (last_ + 1) % n_;
       CHECK_CUDA(cudaMemcpy(
-        reinterpret_cast<char *>(adj_buffer) + last * buf_size, curr_buffer, buf_size,
+        reinterpret_cast<char *>(adj_buffer) + last_ * buf_size_, curr_buffer, buf_size_,
         cudaMemcpyDeviceToDevice));
-      scenes_token[last] = curr_token;
-      ego2global_rot[last] = _ego2global_rot;
-      ego2global_trans[last] = _ego2global_trans;
-      buffer_num = std::min(buffer_num + 1, n);
+      scenes_token_[last_] = curr_token;
+      ego2global_rot_[last_] = ego2global_rot;
+      ego2global_trans_[last_] = ego2global_trans;
+      buffer_num_ = std::min(buffer_num_ + 1, n_);
     }
-    init = true;
+    init_ = true;
   }
-  int havingBuffer(int idx) { return static_cast<int>(idx < buffer_num); }
+  int havingBuffer(int idx) { return static_cast<int>(idx < buffer_num_); }
 
   const void * getFrameBuffer(int idx)
   {
-    idx = (-idx + last + n) % n;
-    return reinterpret_cast<char *>(adj_buffer) + idx * buf_size;
+    idx = (-idx + last_ + n_) % n_;
+    return reinterpret_cast<char *>(adj_buffer) + idx * buf_size_;
   }
   void getEgo2Global(
     int idx, Eigen::Quaternion<float> & adj_ego2global_rot,
     Eigen::Translation3f & adj_ego2global_trans)
   {
-    idx = (-idx + last + n) % n;
-    adj_ego2global_rot = ego2global_rot[idx];
-    adj_ego2global_trans = ego2global_trans[idx];
+    idx = (-idx + last_ + n_) % n_;
+    adj_ego2global_rot = ego2global_rot_[idx];
+    adj_ego2global_trans = ego2global_trans_[idx];
   }
 
   ~AdjFrame() { CHECK_CUDA(cudaFree(adj_buffer)); }
 
 private:
-  int n;
-  size_t buf_size;
+  int n_;
+  size_t buf_size_;
 
-  int last;
-  int buffer_num;
-  bool init;
+  int last_;
+  int buffer_num_;
+  bool init_;
 
-  std::vector<std::string> scenes_token;
-  std::vector<Eigen::Quaternion<float>> ego2global_rot;
-  std::vector<Eigen::Translation3f> ego2global_trans;
+  std::vector<std::string> scenes_token_;
+  std::vector<Eigen::Quaternion<float>> ego2global_rot_;
+  std::vector<Eigen::Translation3f> ego2global_trans_;
 
   void * adj_buffer;
 };

From 3a5e1c194fc69bf8e73fa4d4f5ee1a7b0e32260a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Sep 2024 06:55:35 +0000
Subject: [PATCH 66/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp                | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index df173b2aea82c..b7fb9a60ec0a8 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -64,8 +64,7 @@ class AdjFrame
 
   void saveFrameBuffer(
     const void * curr_buffer, const std::string & curr_token,
-    const Eigen::Quaternion<float> & ego2global_rot,
-    const Eigen::Translation3f & ego2global_trans)
+    const Eigen::Quaternion<float> & ego2global_rot, const Eigen::Translation3f & ego2global_trans)
   {
     int iters = init_ ? 1 : n_;
     while (iters--) {

From f4a71fb7bc8106f57aaeda0fdc48e7cd0173c6b5 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 10 Sep 2024 17:45:05 +0800
Subject: [PATCH 67/85] fix: add _ suffix to member variables in Class BEVDet

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet.hpp       | 164 +++----
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 432 +++++++++---------
 2 files changed, 298 insertions(+), 298 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index b7fb9a60ec0a8..4cc4c43f1cbc7 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -117,9 +117,9 @@ class BEVDet
 public:
   BEVDet() {}
   BEVDet(
-    const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> _cams_intrin,
-    std::vector<Eigen::Quaternion<float>> _cams2ego_rot,
-    std::vector<Eigen::Translation3f> _cams2ego_trans, const std::string & onnx_file,
+    const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> cams_intrin,
+    std::vector<Eigen::Quaternion<float>> cams2ego_rot,
+    std::vector<Eigen::Translation3f> cams2ego_trans, const std::string & onnx_file,
     const std::string & engine_file);
 
   int doInfer(
@@ -156,85 +156,85 @@ class BEVDet
     const Eigen::Translation3f & adj_ego2global_trans, float * transform_dev);
 
 private:
-  int N_img;
-
-  int src_img_h;
-  int src_img_w;
-  int input_img_h;
-  int input_img_w;
-  int crop_h;
-  int crop_w;
-  float resize_radio;
-  int down_sample;
-  int feat_h;
-  int feat_w;
-  int bev_h;
-  int bev_w;
-  int bevpool_channel;
-
-  float depth_start;
-  float depth_end;
-  float depth_step;
-  int depth_num;
-
-  float x_start;
-  float x_end;
-  float x_step;
-  int xgrid_num;
-
-  float y_start;
-  float y_end;
-  float y_step;
-  int ygrid_num;
-
-  float z_start;
-  float z_end;
-  float z_step;
-  int zgrid_num;
-
-  std::vector<float> mean;
-  std::vector<float> std;
-
-  bool use_depth;
-  bool use_adj;
-  int adj_num;
-
-  int class_num;
-  float score_thresh;
-  float nms_overlap_thresh;
-  int nms_pre_maxnum;
-  int nms_post_maxnum;
-  std::vector<float> nms_rescale_factor;
-  std::vector<int> class_num_pre_task;
-  std::map<std::string, int> out_num_task_head;
-
-  std::vector<Eigen::Matrix3f> cams_intrin;
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-  std::vector<Eigen::Translation3f> cams2ego_trans;
-
-  Eigen::Matrix3f post_rot;
-  Eigen::Translation3f post_trans;
-
-  std::vector<size_t> trt_buffer_sizes;
-  void ** trt_buffer_dev;
-  float * cam_params_host;
-  void ** post_buffer;
-
-  std::map<std::string, int> buffer_map;
-
-  int valid_feat_num;
-  int unique_bev_num;
-
-  int transform_size;
-  int cam_params_size;
-
-  Logger g_logger;
-
-  nvinfer1::ICudaEngine * trt_engine;
-  nvinfer1::IExecutionContext * trt_context;
-
-  std::unique_ptr<PostprocessGPU> postprocess_ptr;
-  std::unique_ptr<AdjFrame> adj_frame_ptr;
+  int n_img_;
+
+  int src_img_h_;
+  int src_img_w_;
+  int input_img_h_;
+  int input_img_w_;
+  int crop_h_;
+  int crop_w_;
+  float resize_radio_;
+  int down_sample_;
+  int feat_h_;
+  int feat_w_;
+  int bev_h_;
+  int bev_w_;
+  int bevpool_channel_;
+
+  float depth_start_;
+  float depth_end_;
+  float depth_step_;
+  int depth_num_;
+
+  float x_start_;
+  float x_end_;
+  float x_step_;
+  int xgrid_num_;
+
+  float y_start_;
+  float y_end_;
+  float y_step_;
+  int ygrid_num_;
+
+  float z_start_;
+  float z_end_;
+  float z_step_;
+  int zgrid_num_;
+
+  std::vector<float> mean_;
+  std::vector<float> std_;
+
+  bool use_depth_;
+  bool use_adj_;
+  int adj_num_;
+
+  int class_num_;
+  float score_thresh_;
+  float nms_overlap_thresh_;
+  int nms_pre_maxnum_;
+  int nms_post_maxnum_;
+  std::vector<float> nms_rescale_factor_;
+  std::vector<int> class_num_pre_task_;
+  std::map<std::string, int> out_num_task_head_;
+
+  std::vector<Eigen::Matrix3f> cams_intrin_;
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot_;
+  std::vector<Eigen::Translation3f> cams2ego_trans_;
+
+  Eigen::Matrix3f post_rot_;
+  Eigen::Translation3f post_trans_;
+
+  std::vector<size_t> trt_buffer_sizes_;
+  void ** trt_buffer_dev_;
+  float * cam_params_host_;
+  void ** post_buffer_;
+
+  std::map<std::string, int> buffer_map_;
+
+  int valid_feat_num_;
+  int unique_bev_num_;
+
+  int transform_size_;
+  int cam_params_size_;
+
+  Logger g_logger_;
+
+  nvinfer1::ICudaEngine * trt_engine_;
+  nvinfer1::IExecutionContext * trt_context_;
+
+  std::unique_ptr<PostprocessGPU> postprocess_ptr_;
+  std::unique_ptr<AdjFrame> adj_frame_ptr_;
 };
 }  // namespace autoware::tensorrt_bevdet
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index de4557aaf3373..2da18e0402c60 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -35,16 +35,16 @@ using std::chrono::high_resolution_clock;
 namespace autoware::tensorrt_bevdet
 {
 BEVDet::BEVDet(
-  const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> _cams_intrin,
-  std::vector<Eigen::Quaternion<float>> _cams2ego_rot,
-  std::vector<Eigen::Translation3f> _cams2ego_trans, const std::string & onnx_file,
+  const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> cams_intrin,
+  std::vector<Eigen::Quaternion<float>> cams2ego_rot,
+  std::vector<Eigen::Translation3f> cams2ego_trans, const std::string & onnx_file,
   const std::string & engine_file)
-: cams_intrin(_cams_intrin), cams2ego_rot(_cams2ego_rot), cams2ego_trans(_cams2ego_trans)
+: cams_intrin_(cams_intrin), cams2ego_rot_(cams2ego_rot), cams2ego_trans_(cams2ego_trans)
 {
   initParams(config_file);
-  if (n_img != N_img) {
+  if (n_img != n_img_) {
     RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), "BEVDet need %d images, but given %d images!", N_img, n_img);
+      rclcpp::get_logger("BEVDet"), "BEVDet need %d images, but given %d images!", n_img_, n_img);
   }
   auto start = high_resolution_clock::now();
 
@@ -73,32 +73,32 @@ BEVDet::BEVDet(
   initEngine(engine_file);  // FIXME
   mallocDeviceMemory();
 
-  if (use_adj) {
-    adj_frame_ptr.reset(new AdjFrame(adj_num, trt_buffer_sizes[buffer_map["curr_bevfeat"]]));
+  if (use_adj_) {
+    adj_frame_ptr_.reset(new AdjFrame(adj_num_, trt_buffer_sizes_[buffer_map_["curr_bevfeat"]]));
   }
 
-  cam_params_host = new float[N_img * cam_params_size];
+  cam_params_host_ = new float[n_img_ * cam_params_size_];
 
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["ranks_bev"]], ranks_bev_ptr.get(), valid_feat_num * sizeof(int),
+    trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev_ptr.get(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["ranks_depth"]], ranks_depth_ptr.get(), valid_feat_num * sizeof(int),
+    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth_ptr.get(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["ranks_feat"]], ranks_feat_ptr.get(), valid_feat_num * sizeof(int),
+    trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat_ptr.get(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["interval_starts"]], interval_starts_ptr.get(),
-    unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["interval_starts"]], interval_starts_ptr.get(),
+    unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["interval_lengths"]], interval_lengths_ptr.get(),
-    unique_bev_num * sizeof(int), cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["interval_lengths"]], interval_lengths_ptr.get(),
+    unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
 
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["mean"]], mean.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["mean"]], mean_.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["std"]], std.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["std"]], std_.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
 }
 
 void BEVDet::initCamParams(
@@ -106,156 +106,156 @@ void BEVDet::initCamParams(
   const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
   const std::vector<Eigen::Matrix3f> & curr_cams_intrin)
 {
-  for (int i = 0; i < N_img; i++) {
-    cam_params_host[i * cam_params_size + 0] = curr_cams_intrin[i](0, 0);
-    cam_params_host[i * cam_params_size + 1] = curr_cams_intrin[i](1, 1);
-    cam_params_host[i * cam_params_size + 2] = curr_cams_intrin[i](0, 2);
-    cam_params_host[i * cam_params_size + 3] = curr_cams_intrin[i](1, 2);
-    cam_params_host[i * cam_params_size + 4] = post_rot(0, 0);
-    cam_params_host[i * cam_params_size + 5] = post_rot(0, 1);
-    cam_params_host[i * cam_params_size + 6] = post_trans.translation()(0);
-    cam_params_host[i * cam_params_size + 7] = post_rot(1, 0);
-    cam_params_host[i * cam_params_size + 8] = post_rot(1, 1);
-    cam_params_host[i * cam_params_size + 9] = post_trans.translation()(1);
-    cam_params_host[i * cam_params_size + 10] = 1.f;  // bda 0 0
-    cam_params_host[i * cam_params_size + 11] = 0.f;  // bda 0 1
-    cam_params_host[i * cam_params_size + 12] = 0.f;  // bda 1 0
-    cam_params_host[i * cam_params_size + 13] = 1.f;  // bda 1 1
-    cam_params_host[i * cam_params_size + 14] = 1.f;  // bda 2 2
-    cam_params_host[i * cam_params_size + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
-    cam_params_host[i * cam_params_size + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
-    cam_params_host[i * cam_params_size + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
-    cam_params_host[i * cam_params_size + 18] = curr_cams2ego_trans[i].translation()(0);
-    cam_params_host[i * cam_params_size + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
-    cam_params_host[i * cam_params_size + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
-    cam_params_host[i * cam_params_size + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
-    cam_params_host[i * cam_params_size + 22] = curr_cams2ego_trans[i].translation()(1);
-    cam_params_host[i * cam_params_size + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
-    cam_params_host[i * cam_params_size + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
-    cam_params_host[i * cam_params_size + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
-    cam_params_host[i * cam_params_size + 26] = curr_cams2ego_trans[i].translation()(2);
+  for (int i = 0; i < n_img_; i++) {
+    cam_params_host_[i * cam_params_size_ + 0] = curr_cams_intrin[i](0, 0);
+    cam_params_host_[i * cam_params_size_ + 1] = curr_cams_intrin[i](1, 1);
+    cam_params_host_[i * cam_params_size_ + 2] = curr_cams_intrin[i](0, 2);
+    cam_params_host_[i * cam_params_size_ + 3] = curr_cams_intrin[i](1, 2);
+    cam_params_host_[i * cam_params_size_ + 4] = post_rot_(0, 0);
+    cam_params_host_[i * cam_params_size_ + 5] = post_rot_(0, 1);
+    cam_params_host_[i * cam_params_size_ + 6] = post_trans_.translation()(0);
+    cam_params_host_[i * cam_params_size_ + 7] = post_rot_(1, 0);
+    cam_params_host_[i * cam_params_size_ + 8] = post_rot_(1, 1);
+    cam_params_host_[i * cam_params_size_ + 9] = post_trans_.translation()(1);
+    cam_params_host_[i * cam_params_size_ + 10] = 1.f;  // bda 0 0
+    cam_params_host_[i * cam_params_size_ + 11] = 0.f;  // bda 0 1
+    cam_params_host_[i * cam_params_size_ + 12] = 0.f;  // bda 1 0
+    cam_params_host_[i * cam_params_size_ + 13] = 1.f;  // bda 1 1
+    cam_params_host_[i * cam_params_size_ + 14] = 1.f;  // bda 2 2
+    cam_params_host_[i * cam_params_size_ + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
+    cam_params_host_[i * cam_params_size_ + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
+    cam_params_host_[i * cam_params_size_ + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
+    cam_params_host_[i * cam_params_size_ + 18] = curr_cams2ego_trans[i].translation()(0);
+    cam_params_host_[i * cam_params_size_ + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
+    cam_params_host_[i * cam_params_size_ + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
+    cam_params_host_[i * cam_params_size_ + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
+    cam_params_host_[i * cam_params_size_ + 22] = curr_cams2ego_trans[i].translation()(1);
+    cam_params_host_[i * cam_params_size_ + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
+    cam_params_host_[i * cam_params_size_ + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
+    cam_params_host_[i * cam_params_size_ + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
+    cam_params_host_[i * cam_params_size_ + 26] = curr_cams2ego_trans[i].translation()(2);
   }
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["cam_params"]], cam_params_host,
-    trt_buffer_sizes[buffer_map["cam_params"]], cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["cam_params"]], cam_params_host_,
+    trt_buffer_sizes_[buffer_map_["cam_params"]], cudaMemcpyHostToDevice));
 }
 
 void BEVDet::initParams(const std::string & config_file)
 {
-  mean = std::vector<float>(3);
-  std = std::vector<float>(3);
+  mean_ = std::vector<float>(3);
+  std_ = std::vector<float>(3);
 
   YAML::Node model_config = YAML::LoadFile(config_file);
-  N_img = model_config["data_config"]["Ncams"].as<int>();
-  src_img_h = model_config["data_config"]["src_size"][0].as<int>();
-  src_img_w = model_config["data_config"]["src_size"][1].as<int>();
-  input_img_h = model_config["data_config"]["input_size"][0].as<int>();
-  input_img_w = model_config["data_config"]["input_size"][1].as<int>();
-  resize_radio = model_config["data_config"]["resize_radio"].as<float>();
-  crop_h = model_config["data_config"]["crop"][0].as<int>();
-  crop_w = model_config["data_config"]["crop"][1].as<int>();
-  mean[0] = model_config["mean"][0].as<float>();
-  mean[1] = model_config["mean"][1].as<float>();
-  mean[2] = model_config["mean"][2].as<float>();
-  std[0] = model_config["std"][0].as<float>();
-  std[1] = model_config["std"][1].as<float>();
-  std[2] = model_config["std"][2].as<float>();
-  down_sample = model_config["model"]["down_sample"].as<int>();
-  depth_start = model_config["grid_config"]["depth"][0].as<float>();
-  depth_end = model_config["grid_config"]["depth"][1].as<float>();
-  depth_step = model_config["grid_config"]["depth"][2].as<float>();
-  x_start = model_config["grid_config"]["x"][0].as<float>();
-  x_end = model_config["grid_config"]["x"][1].as<float>();
-  x_step = model_config["grid_config"]["x"][2].as<float>();
-  y_start = model_config["grid_config"]["y"][0].as<float>();
-  y_end = model_config["grid_config"]["y"][1].as<float>();
-  y_step = model_config["grid_config"]["y"][2].as<float>();
-  z_start = model_config["grid_config"]["z"][0].as<float>();
-  z_end = model_config["grid_config"]["z"][1].as<float>();
-  z_step = model_config["grid_config"]["z"][2].as<float>();
-  bevpool_channel = model_config["model"]["bevpool_channels"].as<int>();
-  nms_pre_maxnum = model_config["test_cfg"]["max_per_img"].as<int>();
-  nms_post_maxnum = model_config["test_cfg"]["post_max_size"].as<int>();
-  score_thresh = model_config["test_cfg"]["score_threshold"].as<float>();
-  nms_overlap_thresh = model_config["test_cfg"]["nms_thr"][0].as<float>();
-  use_depth = model_config["use_depth"].as<bool>();
-  use_adj = model_config["use_adj"].as<bool>();
-  transform_size = model_config["transform_size"].as<int>();
-  cam_params_size = model_config["cam_params_size"].as<int>();
+  n_img_ = model_config["data_config"]["Ncams"].as<int>();
+  src_img_h_ = model_config["data_config"]["src_size"][0].as<int>();
+  src_img_w_ = model_config["data_config"]["src_size"][1].as<int>();
+  input_img_h_ = model_config["data_config"]["input_size"][0].as<int>();
+  input_img_w_ = model_config["data_config"]["input_size"][1].as<int>();
+  resize_radio_ = model_config["data_config"]["resize_radio"].as<float>();
+  crop_h_ = model_config["data_config"]["crop"][0].as<int>();
+  crop_w_ = model_config["data_config"]["crop"][1].as<int>();
+  mean_[0] = model_config["mean"][0].as<float>();
+  mean_[1] = model_config["mean"][1].as<float>();
+  mean_[2] = model_config["mean"][2].as<float>();
+  std_[0] = model_config["std"][0].as<float>();
+  std_[1] = model_config["std"][1].as<float>();
+  std_[2] = model_config["std"][2].as<float>();
+  down_sample_ = model_config["model"]["down_sample"].as<int>();
+  depth_start_ = model_config["grid_config"]["depth"][0].as<float>();
+  depth_end_ = model_config["grid_config"]["depth"][1].as<float>();
+  depth_step_ = model_config["grid_config"]["depth"][2].as<float>();
+  x_start_ = model_config["grid_config"]["x"][0].as<float>();
+  x_end_ = model_config["grid_config"]["x"][1].as<float>();
+  x_step_ = model_config["grid_config"]["x"][2].as<float>();
+  y_start_ = model_config["grid_config"]["y"][0].as<float>();
+  y_end_ = model_config["grid_config"]["y"][1].as<float>();
+  y_step_ = model_config["grid_config"]["y"][2].as<float>();
+  z_start_ = model_config["grid_config"]["z"][0].as<float>();
+  z_end_ = model_config["grid_config"]["z"][1].as<float>();
+  z_step_ = model_config["grid_config"]["z"][2].as<float>();
+  bevpool_channel_ = model_config["model"]["bevpool_channels"].as<int>();
+  nms_pre_maxnum_ = model_config["test_cfg"]["max_per_img"].as<int>();
+  nms_post_maxnum_ = model_config["test_cfg"]["post_max_size"].as<int>();
+  score_thresh_ = model_config["test_cfg"]["score_threshold"].as<float>();
+  nms_overlap_thresh_ = model_config["test_cfg"]["nms_thr"][0].as<float>();
+  use_depth_ = model_config["use_depth"].as<bool>();
+  use_adj_ = model_config["use_adj"].as<bool>();
+  transform_size_ = model_config["transform_size"].as<int>();
+  cam_params_size_ = model_config["cam_params_size"].as<int>();
 
   std::vector<std::vector<float>> nms_factor_temp =
     model_config["test_cfg"]["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
-  nms_rescale_factor.clear();
+  nms_rescale_factor_.clear();
   for (const auto & task_factors : nms_factor_temp) {
     for (float factor : task_factors) {
-      nms_rescale_factor.push_back(factor);
+      nms_rescale_factor_.push_back(factor);
     }
   }
 
   std::vector<std::vector<std::string>> class_name_pre_task;
-  class_num = 0;
+  class_num_ = 0;
   YAML::Node tasks = model_config["model"]["tasks"];
-  class_num_pre_task = std::vector<int>();
+  class_num_pre_task_ = std::vector<int>();
   for (auto it : tasks) {
     int num = it["num_class"].as<int>();
-    class_num_pre_task.push_back(num);
-    class_num += num;
+    class_num_pre_task_.push_back(num);
+    class_num_ += num;
     class_name_pre_task.push_back(it["class_names"].as<std::vector<std::string>>());
   }
 
   YAML::Node common_head_channel = model_config["model"]["common_head"]["channels"];
   YAML::Node common_head_name = model_config["model"]["common_head"]["names"];
   for (size_t i = 0; i < common_head_channel.size(); i++) {
-    out_num_task_head[common_head_name[i].as<std::string>()] = common_head_channel[i].as<int>();
+    out_num_task_head_[common_head_name[i].as<std::string>()] = common_head_channel[i].as<int>();
   }
 
-  feat_h = input_img_h / down_sample;
-  feat_w = input_img_w / down_sample;
-  depth_num = (depth_end - depth_start) / depth_step;
-  xgrid_num = (x_end - x_start) / x_step;
-  ygrid_num = (y_end - y_start) / y_step;
-  zgrid_num = (z_end - z_start) / z_step;
-  bev_h = ygrid_num;
-  bev_w = xgrid_num;
+  feat_h_ = input_img_h_ / down_sample_;
+  feat_w_ = input_img_w_ / down_sample_;
+  depth_num_ = (depth_end_ - depth_start_) / depth_step_;
+  xgrid_num_ = (x_end_ - x_start_) / x_step_;
+  ygrid_num_ = (y_end_ - y_start_) / y_step_;
+  zgrid_num_ = (z_end_ - z_start_) / z_step_;
+  bev_h_ = ygrid_num_;
+  bev_w_ = xgrid_num_;
 
-  post_rot << resize_radio, 0, 0, 0, resize_radio, 0, 0, 0, 1;
-  post_trans.translation() << -crop_w, -crop_h, 0;
+  post_rot_ << resize_radio_, 0, 0, 0, resize_radio_, 0, 0, 0, 1;
+  post_trans_.translation() << -crop_w_, -crop_h_, 0;
 
-  adj_num = 0;
-  if (use_adj) {
-    adj_num = model_config["adj_num"].as<int>();
+  adj_num_ = 0;
+  if (use_adj_) {
+    adj_num_ = model_config["adj_num"].as<int>();
   }
 
-  postprocess_ptr.reset(new PostprocessGPU(
-    class_num, score_thresh, nms_overlap_thresh, nms_pre_maxnum, nms_post_maxnum, down_sample,
-    bev_h, bev_w, x_step, y_step, x_start, y_start, class_num_pre_task, nms_rescale_factor));
+  postprocess_ptr_.reset(new PostprocessGPU(
+    class_num_, score_thresh_, nms_overlap_thresh_, nms_pre_maxnum_, nms_post_maxnum_, down_sample_,
+    bev_h_, bev_w_, x_step_, y_step_, x_start_, y_start_, class_num_pre_task_, nms_rescale_factor_));
 }
 
 void BEVDet::mallocDeviceMemory()
 {
-  trt_buffer_sizes.resize(trt_engine->getNbBindings());
-  trt_buffer_dev = reinterpret_cast<void **>(new void *[trt_engine->getNbBindings()]);
-  for (int i = 0; i < trt_engine->getNbBindings(); i++) {
-    nvinfer1::Dims32 dim = trt_context->getBindingDimensions(i);
+  trt_buffer_sizes_.resize(trt_engine_->getNbBindings());
+  trt_buffer_dev_ = reinterpret_cast<void **>(new void *[trt_engine_->getNbBindings()]);
+  for (int i = 0; i < trt_engine_->getNbBindings(); i++) {
+    nvinfer1::Dims32 dim = trt_context_->getBindingDimensions(i);
     size_t size = 1;
     for (int j = 0; j < dim.nbDims; j++) {
       size *= dim.d[j];
     }
-    size *= dataTypeToSize(trt_engine->getBindingDataType(i));
-    trt_buffer_sizes[i] = size;
-    CHECK_CUDA(cudaMalloc(&trt_buffer_dev[i], size));
+    size *= dataTypeToSize(trt_engine_->getBindingDataType(i));
+    trt_buffer_sizes_[i] = size;
+    CHECK_CUDA(cudaMalloc(&trt_buffer_dev_[i], size));
   }
 
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "img num binding : %d", trt_engine->getNbBindings());
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "img num binding : %d", trt_engine_->getNbBindings());
 
-  post_buffer = reinterpret_cast<void **>(new void *[class_num_pre_task.size() * 6]);
-  for (size_t i = 0; i < class_num_pre_task.size(); i++) {
-    post_buffer[i * 6 + 0] = trt_buffer_dev[buffer_map["reg_" + std::to_string(i)]];
-    post_buffer[i * 6 + 1] = trt_buffer_dev[buffer_map["height_" + std::to_string(i)]];
-    post_buffer[i * 6 + 2] = trt_buffer_dev[buffer_map["dim_" + std::to_string(i)]];
-    post_buffer[i * 6 + 3] = trt_buffer_dev[buffer_map["rot_" + std::to_string(i)]];
-    post_buffer[i * 6 + 4] = trt_buffer_dev[buffer_map["vel_" + std::to_string(i)]];
-    post_buffer[i * 6 + 5] = trt_buffer_dev[buffer_map["heatmap_" + std::to_string(i)]];
+  post_buffer_ = reinterpret_cast<void **>(new void *[class_num_pre_task_.size() * 6]);
+  for (size_t i = 0; i < class_num_pre_task_.size(); i++) {
+    post_buffer_[i * 6 + 0] = trt_buffer_dev_[buffer_map_["reg_" + std::to_string(i)]];
+    post_buffer_[i * 6 + 1] = trt_buffer_dev_[buffer_map_["height_" + std::to_string(i)]];
+    post_buffer_[i * 6 + 2] = trt_buffer_dev_[buffer_map_["dim_" + std::to_string(i)]];
+    post_buffer_[i * 6 + 3] = trt_buffer_dev_[buffer_map_["rot_" + std::to_string(i)]];
+    post_buffer_[i * 6 + 4] = trt_buffer_dev_[buffer_map_["vel_" + std::to_string(i)]];
+    post_buffer_[i * 6 + 5] = trt_buffer_dev_[buffer_map_["heatmap_" + std::to_string(i)]];
   }
 
   return;
@@ -266,33 +266,33 @@ void BEVDet::initViewTransformer(
   std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
   std::shared_ptr<int> & interval_lengths_ptr)
 {
-  int num_points = N_img * depth_num * feat_h * feat_w;
+  int num_points = n_img_ * depth_num_ * feat_h_ * feat_w_;
   Eigen::Vector3f * frustum = new Eigen::Vector3f[num_points];
 
-  for (int i = 0; i < N_img; i++) {
-    for (int d_ = 0; d_ < depth_num; d_++) {
-      for (int h_ = 0; h_ < feat_h; h_++) {
-        for (int w_ = 0; w_ < feat_w; w_++) {
-          int offset = i * depth_num * feat_h * feat_w + d_ * feat_h * feat_w + h_ * feat_w + w_;
-          (frustum + offset)->x() = static_cast<float>(w_) * (input_img_w - 1) / (feat_w - 1);
-          (frustum + offset)->y() = static_cast<float>(h_) * (input_img_h - 1) / (feat_h - 1);
-          (frustum + offset)->z() = static_cast<float>(d_) * depth_step + depth_start;
+  for (int i = 0; i < n_img_; i++) {
+    for (int d_ = 0; d_ < depth_num_; d_++) {
+      for (int h_ = 0; h_ < feat_h_; h_++) {
+        for (int w_ = 0; w_ < feat_w_; w_++) {
+          int offset = i * depth_num_ * feat_h_ * feat_w_ + d_ * feat_h_ * feat_w_ + h_ * feat_w_ + w_;
+          (frustum + offset)->x() = static_cast<float>(w_) * (input_img_w_ - 1) / (feat_w_ - 1);
+          (frustum + offset)->y() = static_cast<float>(h_) * (input_img_h_ - 1) / (feat_h_ - 1);
+          (frustum + offset)->z() = static_cast<float>(d_) * depth_step_ + depth_start_;
 
           // eliminate post transformation
-          *(frustum + offset) -= post_trans.translation();
-          *(frustum + offset) = post_rot.inverse() * *(frustum + offset);
+          *(frustum + offset) -= post_trans_.translation();
+          *(frustum + offset) = post_rot_.inverse() * *(frustum + offset);
           //
           (frustum + offset)->x() *= (frustum + offset)->z();
           (frustum + offset)->y() *= (frustum + offset)->z();
           // img to ego -> rot -> trans
-          *(frustum + offset) = cams2ego_rot[i] * cams_intrin[i].inverse() * *(frustum + offset) +
-                                cams2ego_trans[i].translation();
+          *(frustum + offset) = cams2ego_rot_[i] * cams_intrin_[i].inverse() * *(frustum + offset) +
+                                cams2ego_trans_[i].translation();
 
           // voxelization
-          *(frustum + offset) -= Eigen::Vector3f(x_start, y_start, z_start);
-          (frustum + offset)->x() = static_cast<int>((frustum + offset)->x() / x_step);
-          (frustum + offset)->y() = static_cast<int>((frustum + offset)->y() / y_step);
-          (frustum + offset)->z() = static_cast<int>((frustum + offset)->z() / z_step);
+          *(frustum + offset) -= Eigen::Vector3f(x_start_, y_start_, z_start_);
+          (frustum + offset)->x() = static_cast<int>((frustum + offset)->x() / x_step_);
+          (frustum + offset)->y() = static_cast<int>((frustum + offset)->y() / y_step_);
+          (frustum + offset)->z() = static_cast<int>((frustum + offset)->z() / z_step_);
         }
       }
     }
@@ -304,11 +304,11 @@ void BEVDet::initViewTransformer(
   for (int i = 0; i < num_points; i++) {
     _ranks_depth[i] = i;
   }
-  for (int i = 0; i < N_img; i++) {
-    for (int d_ = 0; d_ < depth_num; d_++) {
-      for (int u = 0; u < feat_h * feat_w; u++) {
-        int offset = i * (depth_num * feat_h * feat_w) + d_ * (feat_h * feat_w) + u;
-        _ranks_feat[offset] = i * feat_h * feat_w + u;
+  for (int i = 0; i < n_img_; i++) {
+    for (int d_ = 0; d_ < depth_num_; d_++) {
+      for (int u = 0; u < feat_h_ * feat_w_; u++) {
+        int offset = i * (depth_num_ * feat_h_ * feat_w_) + d_ * (feat_h_ * feat_w_) + u;
+        _ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
       }
     }
   }
@@ -317,30 +317,30 @@ void BEVDet::initViewTransformer(
   for (int i = 0; i < num_points; i++) {
     if (
       static_cast<int>((frustum + i)->x()) >= 0 &&
-      static_cast<int>((frustum + i)->x()) < xgrid_num &&
+      static_cast<int>((frustum + i)->x()) < xgrid_num_ &&
       static_cast<int>((frustum + i)->y()) >= 0 &&
-      static_cast<int>((frustum + i)->y()) < ygrid_num &&
+      static_cast<int>((frustum + i)->y()) < ygrid_num_ &&
       static_cast<int>((frustum + i)->z()) >= 0 &&
-      static_cast<int>((frustum + i)->z()) < zgrid_num) {
+      static_cast<int>((frustum + i)->z()) < zgrid_num_) {
       kept.push_back(i);
     }
   }
 
-  valid_feat_num = kept.size();
-  int * ranks_depth_host = new int[valid_feat_num];
-  int * ranks_feat_host = new int[valid_feat_num];
-  int * ranks_bev_host = new int[valid_feat_num];
-  int * order = new int[valid_feat_num];
+  valid_feat_num_ = kept.size();
+  int * ranks_depth_host = new int[valid_feat_num_];
+  int * ranks_feat_host = new int[valid_feat_num_];
+  int * ranks_bev_host = new int[valid_feat_num_];
+  int * order = new int[valid_feat_num_];
 
-  for (int i = 0; i < valid_feat_num; i++) {
+  for (int i = 0; i < valid_feat_num_; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
-    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num * ygrid_num +
-                        static_cast<int>(p.y()) * xgrid_num + static_cast<int>(p.x());
+    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num_ * ygrid_num_ +
+                        static_cast<int>(p.y()) * xgrid_num_ + static_cast<int>(p.x());
     order[i] = i;
   }
 
-  thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num, order);
-  for (int i = 0; i < valid_feat_num; i++) {
+  thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num_, order);
+  for (int i = 0; i < valid_feat_num_; i++) {
     ranks_depth_host[i] = _ranks_depth[kept[order[i]]];
     ranks_feat_host[i] = _ranks_feat[kept[order[i]]];
   }
@@ -355,7 +355,7 @@ void BEVDet::initViewTransformer(
 
   interval_starts_host.push_back(0);
   int len = 1;
-  for (int i = 1; i < valid_feat_num; i++) {
+  for (int i = 1; i < valid_feat_num_; i++) {
     if (ranks_bev_host[i] != ranks_bev_host[i - 1]) {
       interval_starts_host.push_back(i);
       interval_lengths_host.push_back(len);
@@ -366,7 +366,7 @@ void BEVDet::initViewTransformer(
   }
 
   interval_lengths_host.push_back(len);
-  unique_bev_num = interval_lengths_host.size();
+  unique_bev_num_ = interval_lengths_host.size();
 
   int * interval_starts_host_ptr = new int[interval_starts_host.size()];
   int * interval_lengths_host_ptr = new int[interval_lengths_host.size()];
@@ -384,8 +384,8 @@ void BEVDet::initViewTransformer(
   interval_starts_ptr.reset(interval_starts_host_ptr);
   interval_lengths_ptr.reset(interval_lengths_host_ptr);
 
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num);
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num);
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num_);
+  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num_);
 }
 
 void print_dim(nvinfer1::Dims dim, const std::string & name)
@@ -400,45 +400,45 @@ void print_dim(nvinfer1::Dims dim, const std::string & name)
 
 int BEVDet::initEngine(const std::string & engine_file)
 {
-  if (deserializeTRTEngine(engine_file, &trt_engine)) {
+  if (deserializeTRTEngine(engine_file, &trt_engine_)) {
     return EXIT_FAILURE;
   }
 
-  if (trt_engine == nullptr) {
+  if (trt_engine_ == nullptr) {
     RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to deserialize engine file!");
     return EXIT_FAILURE;
   }
-  trt_context = trt_engine->createExecutionContext();
+  trt_context_ = trt_engine_->createExecutionContext();
 
-  if (trt_context == nullptr) {
+  if (trt_context_ == nullptr) {
     RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to create TensorRT Execution Context!");
     return EXIT_FAILURE;
   }
 
   // set bindings
   std::vector<nvinfer1::Dims32> shapes{
-    {4, {N_img, 3, src_img_h, src_img_w / 4}},
+    {4, {n_img_, 3, src_img_h_, src_img_w_ / 4}},
     {1, {3}},
     {1, {3}},
-    {3, {1, N_img, cam_params_size}},
-    {1, {valid_feat_num}},
-    {1, {valid_feat_num}},
-    {1, {valid_feat_num}},
-    {1, {unique_bev_num}},
-    {1, {unique_bev_num}},
-    {5, {1, adj_num, bevpool_channel, bev_h, bev_w}},
-    {3, {1, adj_num, transform_size}},
+    {3, {1, n_img_, cam_params_size_}},
+    {1, {valid_feat_num_}},
+    {1, {valid_feat_num_}},
+    {1, {valid_feat_num_}},
+    {1, {unique_bev_num_}},
+    {1, {unique_bev_num_}},
+    {5, {1, adj_num_, bevpool_channel_, bev_h_, bev_w_}},
+    {3, {1, adj_num_, transform_size_}},
     {2, {1, 1}}};
 
   for (size_t i = 0; i < shapes.size(); i++) {
-    trt_context->setBindingDimensions(i, shapes[i]);
+    trt_context_->setBindingDimensions(i, shapes[i]);
   }
 
-  buffer_map.clear();
-  for (auto i = 0; i < trt_engine->getNbBindings(); i++) {
-    auto dim = trt_context->getBindingDimensions(i);
-    auto name = trt_engine->getBindingName(i);
-    buffer_map[name] = i;
+  buffer_map_.clear();
+  for (auto i = 0; i < trt_engine_->getNbBindings(); i++) {
+    auto dim = trt_context_->getBindingDimensions(i);
+    auto name = trt_engine_->getBindingName(i);
+    buffer_map_[name] = i;
     print_dim(dim, name);
   }
 
@@ -455,10 +455,10 @@ int BEVDet::deserializeTRTEngine(
   engine_stream << file.rdbuf();
   file.close();
 
-  nvinfer1::IRuntime * runtime = nvinfer1::createInferRuntime(g_logger);
+  nvinfer1::IRuntime * runtime = nvinfer1::createInferRuntime(g_logger_);
   if (runtime == nullptr) {
     std::string msg("Failed to build runtime parser!");
-    g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
     return EXIT_FAILURE;
   }
   engine_stream.seekg(0, std::ios::end);
@@ -471,7 +471,7 @@ int BEVDet::deserializeTRTEngine(
   nvinfer1::ICudaEngine * engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
   if (engine == nullptr) {
     std::string msg("Failed to build engine parser!");
-    g_logger.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
     return EXIT_FAILURE;
   }
   *engine_ptr = engine;
@@ -492,31 +492,31 @@ void BEVDet::getAdjBEVFeature(
   const Eigen::Translation3f & ego2global_trans)
 {
   int flag = 1;
-  if (adj_frame_ptr->lastScenesToken() != curr_scene_token) {
-    adj_frame_ptr->reset();
+  if (adj_frame_ptr_->lastScenesToken() != curr_scene_token) {
+    adj_frame_ptr_->reset();
     flag = 0;
   }
 
   // the smaller the idx, the newer th adj_bevfeat
-  for (int i = 0; i < adj_num; i++) {
-    const void * adj_buffer = adj_frame_ptr->getFrameBuffer(i);
+  for (int i = 0; i < adj_num_; i++) {
+    const void * adj_buffer = adj_frame_ptr_->getFrameBuffer(i);
 
-    size_t buf_size = trt_buffer_sizes[buffer_map["adj_feats"]] / adj_num;
+    size_t buf_size = trt_buffer_sizes_[buffer_map_["adj_feats"]] / adj_num_;
 
     CHECK_CUDA(cudaMemcpy(
-      reinterpret_cast<char *>(trt_buffer_dev[buffer_map["adj_feats"]]) + i * buf_size, adj_buffer,
+      reinterpret_cast<char *>(trt_buffer_dev_[buffer_map_["adj_feats"]]) + i * buf_size, adj_buffer,
       buf_size, cudaMemcpyDeviceToDevice));
 
     Eigen::Quaternion<float> adj_ego2global_rot;
     Eigen::Translation3f adj_ego2global_trans;
-    adj_frame_ptr->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
+    adj_frame_ptr_->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
 
     getCurr2AdjTransform(
       ego2global_rot, adj_ego2global_rot, ego2global_trans, adj_ego2global_trans,
-      reinterpret_cast<float *>(trt_buffer_dev[buffer_map["transforms"]]) + i * transform_size);
+      reinterpret_cast<float *>(trt_buffer_dev_[buffer_map_["transforms"]]) + i * transform_size_);
   }
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["flag"]], &flag, trt_buffer_sizes[buffer_map["flag"]],
+    trt_buffer_dev_[buffer_map_["flag"]], &flag, trt_buffer_sizes_[buffer_map_["flag"]],
     cudaMemcpyHostToDevice));
 }
 
@@ -559,10 +559,10 @@ void BEVDet::getCurr2AdjTransform(
   currEgo2adjEgo_2d(1, 2) = currEgo2adjEgo(1, 3);
 
   Eigen::Matrix3f gridbev2egobev;
-  gridbev2egobev(0, 0) = x_step;
-  gridbev2egobev(1, 1) = y_step;
-  gridbev2egobev(0, 2) = x_start;
-  gridbev2egobev(1, 2) = y_start;
+  gridbev2egobev(0, 0) = x_step_;
+  gridbev2egobev(1, 1) = y_step_;
+  gridbev2egobev(0, 2) = x_start_;
+  gridbev2egobev(1, 2) = y_start_;
   gridbev2egobev(2, 2) = 1.f;
 
   gridbev2egobev(0, 1) = 0.f;
@@ -574,7 +574,7 @@ void BEVDet::getCurr2AdjTransform(
 
   CHECK_CUDA(cudaMemcpy(
     transform_dev, Eigen::Matrix3f(currgrid2adjgrid.transpose()).data(),
-    transform_size * sizeof(float), cudaMemcpyHostToDevice));
+    transform_size_ * sizeof(float), cudaMemcpyHostToDevice));
 }
 
 int BEVDet::doInfer(
@@ -584,7 +584,7 @@ int BEVDet::doInfer(
 
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev[buffer_map["images"]], cam_data.imgs_dev, trt_buffer_sizes[buffer_map["images"]],
+    trt_buffer_dev_[buffer_map_["images"]], cam_data.imgs_dev, trt_buffer_sizes_[buffer_map_["images"]],
     cudaMemcpyDeviceToDevice));
 
   initCamParams(
@@ -593,17 +593,17 @@ int BEVDet::doInfer(
   getAdjBEVFeature(
     cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
-  if (!trt_context->executeV2(trt_buffer_dev)) {
+  if (!trt_context_->executeV2(trt_buffer_dev_)) {
     RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "BEVDet forward failing!");
   }
 
-  adj_frame_ptr->saveFrameBuffer(
-    trt_buffer_dev[buffer_map["curr_bevfeat"]], cam_data.param.scene_token,
+  adj_frame_ptr_->saveFrameBuffer(
+    trt_buffer_dev_[buffer_map_["curr_bevfeat"]], cam_data.param.scene_token,
     cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
 
   auto end = high_resolution_clock::now();
 
-  postprocess_ptr->DoPostprocess(post_buffer, out_detections);
+  postprocess_ptr_->DoPostprocess(post_buffer_, out_detections);
   CHECK_CUDA(cudaDeviceSynchronize());
 
   auto post_end = high_resolution_clock::now();
@@ -639,7 +639,7 @@ void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtF
     {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},     {3, {1, 6, 27}}, {1, {370000}},
     {1, {370000}},         {1, {370000}}, {1, {14000}}, {1, {14000}},    {5, {1, 8, 80, 128, 128}},
     {3, {1, 8, 6}},        {2, {1, 1}}};
-  nvinfer1::IBuilder * builder = nvinfer1::createInferBuilder(g_logger);
+  nvinfer1::IBuilder * builder = nvinfer1::createInferBuilder(g_logger_);
   nvinfer1::INetworkDefinition * network =
     builder->createNetworkV2(1U << int(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
   nvinfer1::IOptimizationProfile * profile = builder->createOptimizationProfile();
@@ -648,9 +648,9 @@ void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtF
 
   config->setFlag(nvinfer1::BuilderFlag::kFP16);
 
-  nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger);
+  nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger_);
 
-  if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger.reportable_severity))) {
+  if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger_.reportable_severity))) {
     RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed parsing .onnx file!");
     for (int i = 0; i < parser->getNbErrors(); ++i) {
       auto * error = parser->getError(i);
@@ -678,7 +678,7 @@ void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtF
   }
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded building serialized engine!");
 
-  nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger)};
+  nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger_)};
   engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
   if (engine == nullptr) {
     RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed building engine!");
@@ -701,15 +701,15 @@ void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtF
 
 BEVDet::~BEVDet()
 {
-  for (int i = 0; i < trt_engine->getNbBindings(); i++) {
-    CHECK_CUDA(cudaFree(trt_buffer_dev[i]));
+  for (int i = 0; i < trt_engine_->getNbBindings(); i++) {
+    CHECK_CUDA(cudaFree(trt_buffer_dev_[i]));
   }
-  delete[] trt_buffer_dev;
-  delete[] post_buffer;
+  delete[] trt_buffer_dev_;
+  delete[] post_buffer_;
 
-  delete[] cam_params_host;
+  delete[] cam_params_host_;
 
-  trt_context->destroy();
-  trt_engine->destroy();
+  trt_context_->destroy();
+  trt_engine_->destroy();
 }
 }  // namespace autoware::tensorrt_bevdet

From 881b457dbe0c1c2b1923802060984f441f45cc60 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 10 Sep 2024 09:47:51 +0000
Subject: [PATCH 68/85] style(pre-commit): autofix

---
 .../autoware_tensorrt_bevdet/src/bevdet.cpp    | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 2da18e0402c60..cf17db8b4a839 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -83,8 +83,8 @@ BEVDet::BEVDet(
     trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev_ptr.get(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth_ptr.get(), valid_feat_num_ * sizeof(int),
-    cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth_ptr.get(),
+    valid_feat_num_ * sizeof(int), cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
     trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat_ptr.get(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
@@ -228,7 +228,8 @@ void BEVDet::initParams(const std::string & config_file)
 
   postprocess_ptr_.reset(new PostprocessGPU(
     class_num_, score_thresh_, nms_overlap_thresh_, nms_pre_maxnum_, nms_post_maxnum_, down_sample_,
-    bev_h_, bev_w_, x_step_, y_step_, x_start_, y_start_, class_num_pre_task_, nms_rescale_factor_));
+    bev_h_, bev_w_, x_step_, y_step_, x_start_, y_start_, class_num_pre_task_,
+    nms_rescale_factor_));
 }
 
 void BEVDet::mallocDeviceMemory()
@@ -273,7 +274,8 @@ void BEVDet::initViewTransformer(
     for (int d_ = 0; d_ < depth_num_; d_++) {
       for (int h_ = 0; h_ < feat_h_; h_++) {
         for (int w_ = 0; w_ < feat_w_; w_++) {
-          int offset = i * depth_num_ * feat_h_ * feat_w_ + d_ * feat_h_ * feat_w_ + h_ * feat_w_ + w_;
+          int offset =
+            i * depth_num_ * feat_h_ * feat_w_ + d_ * feat_h_ * feat_w_ + h_ * feat_w_ + w_;
           (frustum + offset)->x() = static_cast<float>(w_) * (input_img_w_ - 1) / (feat_w_ - 1);
           (frustum + offset)->y() = static_cast<float>(h_) * (input_img_h_ - 1) / (feat_h_ - 1);
           (frustum + offset)->z() = static_cast<float>(d_) * depth_step_ + depth_start_;
@@ -504,8 +506,8 @@ void BEVDet::getAdjBEVFeature(
     size_t buf_size = trt_buffer_sizes_[buffer_map_["adj_feats"]] / adj_num_;
 
     CHECK_CUDA(cudaMemcpy(
-      reinterpret_cast<char *>(trt_buffer_dev_[buffer_map_["adj_feats"]]) + i * buf_size, adj_buffer,
-      buf_size, cudaMemcpyDeviceToDevice));
+      reinterpret_cast<char *>(trt_buffer_dev_[buffer_map_["adj_feats"]]) + i * buf_size,
+      adj_buffer, buf_size, cudaMemcpyDeviceToDevice));
 
     Eigen::Quaternion<float> adj_ego2global_rot;
     Eigen::Translation3f adj_ego2global_trans;
@@ -584,8 +586,8 @@ int BEVDet::doInfer(
 
   auto start = high_resolution_clock::now();
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["images"]], cam_data.imgs_dev, trt_buffer_sizes_[buffer_map_["images"]],
-    cudaMemcpyDeviceToDevice));
+    trt_buffer_dev_[buffer_map_["images"]], cam_data.imgs_dev,
+    trt_buffer_sizes_[buffer_map_["images"]], cudaMemcpyDeviceToDevice));
 
   initCamParams(
     cam_data.param.cams2ego_rot, cam_data.param.cams2ego_trans, cam_data.param.cams_intrin);

From cea3f9ee3c787d653f8c8ec512145d7de9ef3c83 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 10 Sep 2024 20:05:24 +0800
Subject: [PATCH 69/85] fix: add how to use in readme.md

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 30c5295ff3eb9..a038ef5db1d1f 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -33,6 +33,19 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 | ---------------- | ------------------------------------------------ | ---------------- |
 | `~/output/boxes` | `autoware_perception_msgs::msg::DetectedObjects` | detected objects |
 
+## How to Use Tensorrt BEVDet Node
+
+1. launch `tensorrt_bevdet_node`
+
+```bash
+
+ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
+```
+
+2. play nuScenes data
+
+please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
+
 ## Limitation
 
 The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.

From 8a07984d2535dd22d6b5502d9224ae3e295dbbf3 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 18 Sep 2024 11:57:47 +0800
Subject: [PATCH 70/85] fix: replace ptr with std::vector

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 91 +++++++++----------
 1 file changed, 41 insertions(+), 50 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index cf17db8b4a839..8da43efe45280 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -268,7 +268,7 @@ void BEVDet::initViewTransformer(
   std::shared_ptr<int> & interval_lengths_ptr)
 {
   int num_points = n_img_ * depth_num_ * feat_h_ * feat_w_;
-  Eigen::Vector3f * frustum = new Eigen::Vector3f[num_points];
+  std::vector<Eigen::Vector3f> frustum(num_points);
 
   for (int i = 0; i < n_img_; i++) {
     for (int d_ = 0; d_ < depth_num_; d_++) {
@@ -276,41 +276,41 @@ void BEVDet::initViewTransformer(
         for (int w_ = 0; w_ < feat_w_; w_++) {
           int offset =
             i * depth_num_ * feat_h_ * feat_w_ + d_ * feat_h_ * feat_w_ + h_ * feat_w_ + w_;
-          (frustum + offset)->x() = static_cast<float>(w_) * (input_img_w_ - 1) / (feat_w_ - 1);
-          (frustum + offset)->y() = static_cast<float>(h_) * (input_img_h_ - 1) / (feat_h_ - 1);
-          (frustum + offset)->z() = static_cast<float>(d_) * depth_step_ + depth_start_;
+          frustum[offset].x() = static_cast<float>(w_) * (input_img_w_ - 1) / (feat_w_ - 1);
+          frustum[offset].y() = static_cast<float>(h_) * (input_img_h_ - 1) / (feat_h_ - 1);
+          frustum[offset].z() = static_cast<float>(d_) * depth_step_ + depth_start_;
 
           // eliminate post transformation
-          *(frustum + offset) -= post_trans_.translation();
-          *(frustum + offset) = post_rot_.inverse() * *(frustum + offset);
+          frustum[offset] -= post_trans_.translation();
+          frustum[offset] = post_rot_.inverse() * frustum[offset];
           //
-          (frustum + offset)->x() *= (frustum + offset)->z();
-          (frustum + offset)->y() *= (frustum + offset)->z();
+          frustum[offset].x() *= frustum[offset].z();
+          frustum[offset].y() *= frustum[offset].z();
           // img to ego -> rot -> trans
-          *(frustum + offset) = cams2ego_rot_[i] * cams_intrin_[i].inverse() * *(frustum + offset) +
+          frustum[offset] = cams2ego_rot_[i] * cams_intrin_[i].inverse() * frustum[offset] +
                                 cams2ego_trans_[i].translation();
 
           // voxelization
-          *(frustum + offset) -= Eigen::Vector3f(x_start_, y_start_, z_start_);
-          (frustum + offset)->x() = static_cast<int>((frustum + offset)->x() / x_step_);
-          (frustum + offset)->y() = static_cast<int>((frustum + offset)->y() / y_step_);
-          (frustum + offset)->z() = static_cast<int>((frustum + offset)->z() / z_step_);
+          frustum[offset] -= Eigen::Vector3f(x_start_, y_start_, z_start_);
+          frustum[offset].x() = static_cast<int>(frustum[offset].x() / x_step_);
+          frustum[offset].y() = static_cast<int>(frustum[offset].y() / y_step_);
+          frustum[offset].z() = static_cast<int>(frustum[offset].z() / z_step_);
         }
       }
     }
   }
 
-  int * _ranks_depth = new int[num_points];
-  int * _ranks_feat = new int[num_points];
+  std::vector<int> ranks_depth(num_points);
+  std::vector<int> ranks_feat(num_points);
 
   for (int i = 0; i < num_points; i++) {
-    _ranks_depth[i] = i;
+    ranks_depth[i] = i;
   }
   for (int i = 0; i < n_img_; i++) {
     for (int d_ = 0; d_ < depth_num_; d_++) {
       for (int u = 0; u < feat_h_ * feat_w_; u++) {
         int offset = i * (depth_num_ * feat_h_ * feat_w_) + d_ * (feat_h_ * feat_w_) + u;
-        _ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
+        ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
       }
     }
   }
@@ -318,21 +318,21 @@ void BEVDet::initViewTransformer(
   std::vector<int> kept;
   for (int i = 0; i < num_points; i++) {
     if (
-      static_cast<int>((frustum + i)->x()) >= 0 &&
-      static_cast<int>((frustum + i)->x()) < xgrid_num_ &&
-      static_cast<int>((frustum + i)->y()) >= 0 &&
-      static_cast<int>((frustum + i)->y()) < ygrid_num_ &&
-      static_cast<int>((frustum + i)->z()) >= 0 &&
-      static_cast<int>((frustum + i)->z()) < zgrid_num_) {
+      static_cast<int>(frustum[i].x()) >= 0 &&
+      static_cast<int>(frustum[i].x()) < xgrid_num_ &&
+      static_cast<int>(frustum[i].y()) >= 0 &&
+      static_cast<int>(frustum[i].y()) < ygrid_num_ &&
+      static_cast<int>(frustum[i].z()) >= 0 &&
+      static_cast<int>(frustum[i].z()) < zgrid_num_) {
       kept.push_back(i);
     }
   }
 
   valid_feat_num_ = kept.size();
-  int * ranks_depth_host = new int[valid_feat_num_];
-  int * ranks_feat_host = new int[valid_feat_num_];
-  int * ranks_bev_host = new int[valid_feat_num_];
-  int * order = new int[valid_feat_num_];
+  std::vector<int> ranks_depth_host(valid_feat_num_);
+  std::vector<int> ranks_feat_host(valid_feat_num_);
+  std::vector<int> ranks_bev_host(valid_feat_num_);
+  std::vector<int> order(valid_feat_num_);
 
   for (int i = 0; i < valid_feat_num_; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
@@ -341,17 +341,12 @@ void BEVDet::initViewTransformer(
     order[i] = i;
   }
 
-  thrust::sort_by_key(ranks_bev_host, ranks_bev_host + valid_feat_num_, order);
+  thrust::sort_by_key(ranks_bev_host.begin(), ranks_bev_host.end(), order.begin());
   for (int i = 0; i < valid_feat_num_; i++) {
-    ranks_depth_host[i] = _ranks_depth[kept[order[i]]];
-    ranks_feat_host[i] = _ranks_feat[kept[order[i]]];
+    ranks_depth_host[i] = ranks_depth[kept[order[i]]];
+    ranks_feat_host[i] = ranks_feat[kept[order[i]]];
   }
 
-  delete[] _ranks_depth;
-  delete[] _ranks_feat;
-  delete[] frustum;
-  delete[] order;
-
   std::vector<int> interval_starts_host;
   std::vector<int> interval_lengths_host;
 
@@ -370,21 +365,17 @@ void BEVDet::initViewTransformer(
   interval_lengths_host.push_back(len);
   unique_bev_num_ = interval_lengths_host.size();
 
-  int * interval_starts_host_ptr = new int[interval_starts_host.size()];
-  int * interval_lengths_host_ptr = new int[interval_lengths_host.size()];
-
-  memcpy(
-    interval_starts_host_ptr, interval_starts_host.data(),
-    interval_starts_host.size() * sizeof(int));
-  memcpy(
-    interval_lengths_host_ptr, interval_lengths_host.data(),
-    interval_lengths_host.size() * sizeof(int));
-
-  ranks_bev_ptr.reset(ranks_bev_host);
-  ranks_depth_ptr.reset(ranks_depth_host);
-  ranks_feat_ptr.reset(ranks_feat_host);
-  interval_starts_ptr.reset(interval_starts_host_ptr);
-  interval_lengths_ptr.reset(interval_lengths_host_ptr);
+  ranks_bev_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
+  ranks_depth_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
+  ranks_feat_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
+  interval_starts_ptr.reset(new int[interval_starts_host.size()], std::default_delete<int[]>());
+  interval_lengths_ptr.reset(new int[interval_lengths_host.size()], std::default_delete<int[]>());
+
+  std::copy(ranks_bev_host.begin(), ranks_bev_host.end(), ranks_bev_ptr.get());
+  std::copy(ranks_depth_host.begin(), ranks_depth_host.end(), ranks_depth_ptr.get());
+  std::copy(ranks_feat_host.begin(), ranks_feat_host.end(), ranks_feat_ptr.get());
+  std::copy(interval_starts_host.begin(), interval_starts_host.end(), interval_starts_ptr.get());
+  std::copy(interval_lengths_host.begin(), interval_lengths_host.end(), interval_lengths_ptr.get());
 
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num_);
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num_);

From b6b1f91ba391f81421ba7688ddc45e4769a2abfe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 03:59:59 +0000
Subject: [PATCH 71/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/src/bevdet.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 8da43efe45280..b7e8669c1d798 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -288,7 +288,7 @@ void BEVDet::initViewTransformer(
           frustum[offset].y() *= frustum[offset].z();
           // img to ego -> rot -> trans
           frustum[offset] = cams2ego_rot_[i] * cams_intrin_[i].inverse() * frustum[offset] +
-                                cams2ego_trans_[i].translation();
+                            cams2ego_trans_[i].translation();
 
           // voxelization
           frustum[offset] -= Eigen::Vector3f(x_start_, y_start_, z_start_);
@@ -318,12 +318,9 @@ void BEVDet::initViewTransformer(
   std::vector<int> kept;
   for (int i = 0; i < num_points; i++) {
     if (
-      static_cast<int>(frustum[i].x()) >= 0 &&
-      static_cast<int>(frustum[i].x()) < xgrid_num_ &&
-      static_cast<int>(frustum[i].y()) >= 0 &&
-      static_cast<int>(frustum[i].y()) < ygrid_num_ &&
-      static_cast<int>(frustum[i].z()) >= 0 &&
-      static_cast<int>(frustum[i].z()) < zgrid_num_) {
+      static_cast<int>(frustum[i].x()) >= 0 && static_cast<int>(frustum[i].x()) < xgrid_num_ &&
+      static_cast<int>(frustum[i].y()) >= 0 && static_cast<int>(frustum[i].y()) < ygrid_num_ &&
+      static_cast<int>(frustum[i].z()) >= 0 && static_cast<int>(frustum[i].z()) < zgrid_num_) {
       kept.push_back(i);
     }
   }

From 7dc00d79191f85c77c30c30030172cd1a5aac0f0 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 18 Sep 2024 13:18:11 +0800
Subject: [PATCH 72/85] trying fix pre-commit.ci error

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index a038ef5db1d1f..2a768cdd5fd57 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -42,9 +42,9 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
 ```
 
-2. play nuScenes data
+2. play ros2 bag of nuScenes data
 
-please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
+Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
 
 ## Limitation
 

From dbc0337683acfc1a58b0c09b3cb1810fe7a1480b Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 18 Sep 2024 13:27:12 +0800
Subject: [PATCH 73/85] trying fix pre-commit.ci error

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 2a768cdd5fd57..66a0c47e8bc2f 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -37,14 +37,14 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 1. launch `tensorrt_bevdet_node`
 
-```bash
+    ```bash
 
-ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
-```
+    ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
+    ```
 
 2. play ros2 bag of nuScenes data
 
-Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
+    Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
 
 ## Limitation
 

From dc6fd9b5882fb97b5c856ed6554b43b9d1552d0c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 05:30:08 +0000
Subject: [PATCH 74/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 66a0c47e8bc2f..ce6b1cc02c2bb 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -37,14 +37,14 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 1. launch `tensorrt_bevdet_node`
 
-    ```bash
+   ```bash
 
-    ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
-    ```
+   ros2 launch autoware_tensorrt_bevdet tensorrt_bevdet_node.launch.xml
+   ```
 
 2. play ros2 bag of nuScenes data
 
-    Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
+   Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
 
 ## Limitation
 

From 13b4c5b448888ac38242c69370c344e78e00295f Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Mon, 23 Sep 2024 19:56:37 +0800
Subject: [PATCH 75/85] replace shared_ptr with std::vector

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet.hpp       |  8 +-
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 85 ++++++++-----------
 2 files changed, 38 insertions(+), 55 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index 4cc4c43f1cbc7..a8d84f637432b 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -130,9 +130,9 @@ class BEVDet
   void initParams(const std::string & config_file);
 
   void initViewTransformer(
-    std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
-    std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
-    std::shared_ptr<int> & interval_lengths_ptr);
+    std::vector<int> & ranks_bev, std::vector<int> & ranks_depth,
+    std::vector<int> & ranks_feat, std::vector<int> & interval_starts,
+    std::vector<int> & interval_lengths);
   void exportEngine(const std::string & onnxFile, const std::string & trtFile);
   int initEngine(const std::string & engine_file);
 
@@ -217,7 +217,7 @@ class BEVDet
 
   std::vector<size_t> trt_buffer_sizes_;
   void ** trt_buffer_dev_;
-  float * cam_params_host_;
+  std::vector<float> cam_params_host_;
   void ** post_buffer_;
 
   std::map<std::string, int> buffer_map_;
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index b7e8669c1d798..93d098dd28f37 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -48,14 +48,14 @@ BEVDet::BEVDet(
   }
   auto start = high_resolution_clock::now();
 
-  std::shared_ptr<int> ranks_bev_ptr = nullptr;
-  std::shared_ptr<int> ranks_depth_ptr = nullptr;
-  std::shared_ptr<int> ranks_feat_ptr = nullptr;
-  std::shared_ptr<int> interval_starts_ptr = nullptr;
-  std::shared_ptr<int> interval_lengths_ptr = nullptr;
+  std::vector<int> ranks_bev;
+  std::vector<int> ranks_depth;
+  std::vector<int> ranks_feat;
+  std::vector<int> interval_starts;
+  std::vector<int> interval_lengths;
 
   initViewTransformer(
-    ranks_bev_ptr, ranks_depth_ptr, ranks_feat_ptr, interval_starts_ptr, interval_lengths_ptr);
+    ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
   RCLCPP_INFO(
@@ -77,22 +77,22 @@ BEVDet::BEVDet(
     adj_frame_ptr_.reset(new AdjFrame(adj_num_, trt_buffer_sizes_[buffer_map_["curr_bevfeat"]]));
   }
 
-  cam_params_host_ = new float[n_img_ * cam_params_size_];
+  cam_params_host_.resize(n_img_ * cam_params_size_);
 
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev_ptr.get(), valid_feat_num_ * sizeof(int),
+    trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev.data(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth_ptr.get(),
+    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth.data(),
     valid_feat_num_ * sizeof(int), cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat_ptr.get(), valid_feat_num_ * sizeof(int),
+    trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat.data(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["interval_starts"]], interval_starts_ptr.get(),
+    trt_buffer_dev_[buffer_map_["interval_starts"]], interval_starts.data(),
     unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["interval_lengths"]], interval_lengths_ptr.get(),
+    trt_buffer_dev_[buffer_map_["interval_lengths"]], interval_lengths.data(),
     unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
 
   CHECK_CUDA(cudaMemcpy(
@@ -136,7 +136,7 @@ void BEVDet::initCamParams(
     cam_params_host_[i * cam_params_size_ + 26] = curr_cams2ego_trans[i].translation()(2);
   }
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["cam_params"]], cam_params_host_,
+    trt_buffer_dev_[buffer_map_["cam_params"]], cam_params_host_.data(),
     trt_buffer_sizes_[buffer_map_["cam_params"]], cudaMemcpyHostToDevice));
 }
 
@@ -263,9 +263,9 @@ void BEVDet::mallocDeviceMemory()
 }
 
 void BEVDet::initViewTransformer(
-  std::shared_ptr<int> & ranks_bev_ptr, std::shared_ptr<int> & ranks_depth_ptr,
-  std::shared_ptr<int> & ranks_feat_ptr, std::shared_ptr<int> & interval_starts_ptr,
-  std::shared_ptr<int> & interval_lengths_ptr)
+  std::vector<int> & ranks_bev, std::vector<int> & ranks_depth,
+  std::vector<int> & ranks_feat, std::vector<int> & interval_starts,
+  std::vector<int> & interval_lengths)
 {
   int num_points = n_img_ * depth_num_ * feat_h_ * feat_w_;
   std::vector<Eigen::Vector3f> frustum(num_points);
@@ -300,17 +300,17 @@ void BEVDet::initViewTransformer(
     }
   }
 
-  std::vector<int> ranks_depth(num_points);
-  std::vector<int> ranks_feat(num_points);
+  std::vector<int> _ranks_depth(num_points);
+  std::vector<int> _ranks_feat(num_points);
 
   for (int i = 0; i < num_points; i++) {
-    ranks_depth[i] = i;
+    _ranks_depth[i] = i;
   }
   for (int i = 0; i < n_img_; i++) {
     for (int d_ = 0; d_ < depth_num_; d_++) {
       for (int u = 0; u < feat_h_ * feat_w_; u++) {
         int offset = i * (depth_num_ * feat_h_ * feat_w_) + d_ * (feat_h_ * feat_w_) + u;
-        ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
+        _ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
       }
     }
   }
@@ -325,54 +325,39 @@ void BEVDet::initViewTransformer(
     }
   }
 
-  valid_feat_num_ = kept.size();
-  std::vector<int> ranks_depth_host(valid_feat_num_);
-  std::vector<int> ranks_feat_host(valid_feat_num_);
-  std::vector<int> ranks_bev_host(valid_feat_num_);
+  valid_feat_num_ = kept.size();;
+  ranks_depth.resize(valid_feat_num_);
+  ranks_feat.resize(valid_feat_num_);
+  ranks_bev.resize(valid_feat_num_);
   std::vector<int> order(valid_feat_num_);
 
   for (int i = 0; i < valid_feat_num_; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
-    ranks_bev_host[i] = static_cast<int>(p.z()) * xgrid_num_ * ygrid_num_ +
+    ranks_bev[i] = static_cast<int>(p.z()) * xgrid_num_ * ygrid_num_ +
                         static_cast<int>(p.y()) * xgrid_num_ + static_cast<int>(p.x());
     order[i] = i;
   }
 
-  thrust::sort_by_key(ranks_bev_host.begin(), ranks_bev_host.end(), order.begin());
+  thrust::sort_by_key(ranks_bev.begin(), ranks_bev.end(), order.begin());
   for (int i = 0; i < valid_feat_num_; i++) {
-    ranks_depth_host[i] = ranks_depth[kept[order[i]]];
-    ranks_feat_host[i] = ranks_feat[kept[order[i]]];
+    ranks_depth[i] = _ranks_depth[kept[order[i]]];
+    ranks_feat[i] = _ranks_feat[kept[order[i]]];
   }
 
-  std::vector<int> interval_starts_host;
-  std::vector<int> interval_lengths_host;
-
-  interval_starts_host.push_back(0);
+  interval_starts.push_back(0);
   int len = 1;
   for (int i = 1; i < valid_feat_num_; i++) {
-    if (ranks_bev_host[i] != ranks_bev_host[i - 1]) {
-      interval_starts_host.push_back(i);
-      interval_lengths_host.push_back(len);
+    if (ranks_bev[i] != ranks_bev[i - 1]) {
+      interval_starts.push_back(i);
+      interval_lengths.push_back(len);
       len = 1;
     } else {
       len++;
     }
   }
 
-  interval_lengths_host.push_back(len);
-  unique_bev_num_ = interval_lengths_host.size();
-
-  ranks_bev_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
-  ranks_depth_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
-  ranks_feat_ptr.reset(new int[valid_feat_num_], std::default_delete<int[]>());
-  interval_starts_ptr.reset(new int[interval_starts_host.size()], std::default_delete<int[]>());
-  interval_lengths_ptr.reset(new int[interval_lengths_host.size()], std::default_delete<int[]>());
-
-  std::copy(ranks_bev_host.begin(), ranks_bev_host.end(), ranks_bev_ptr.get());
-  std::copy(ranks_depth_host.begin(), ranks_depth_host.end(), ranks_depth_ptr.get());
-  std::copy(ranks_feat_host.begin(), ranks_feat_host.end(), ranks_feat_ptr.get());
-  std::copy(interval_starts_host.begin(), interval_starts_host.end(), interval_starts_ptr.get());
-  std::copy(interval_lengths_host.begin(), interval_lengths_host.end(), interval_lengths_ptr.get());
+  interval_lengths.push_back(len);
+  unique_bev_num_ = interval_lengths.size();
 
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num_);
   RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num_);
@@ -697,8 +682,6 @@ BEVDet::~BEVDet()
   delete[] trt_buffer_dev_;
   delete[] post_buffer_;
 
-  delete[] cam_params_host_;
-
   trt_context_->destroy();
   trt_engine_->destroy();
 }

From 93556def45f11da4071cd195996b2aebdcfe2860 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 23 Sep 2024 11:59:05 +0000
Subject: [PATCH 76/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/bevdet.hpp |  5 ++---
 .../autoware_tensorrt_bevdet/src/bevdet.cpp     | 17 ++++++++---------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
index a8d84f637432b..8c6241ba4e647 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
@@ -130,9 +130,8 @@ class BEVDet
   void initParams(const std::string & config_file);
 
   void initViewTransformer(
-    std::vector<int> & ranks_bev, std::vector<int> & ranks_depth,
-    std::vector<int> & ranks_feat, std::vector<int> & interval_starts,
-    std::vector<int> & interval_lengths);
+    std::vector<int> & ranks_bev, std::vector<int> & ranks_depth, std::vector<int> & ranks_feat,
+    std::vector<int> & interval_starts, std::vector<int> & interval_lengths);
   void exportEngine(const std::string & onnxFile, const std::string & trtFile);
   int initEngine(const std::string & engine_file);
 
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
index 93d098dd28f37..0296a829f1e35 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
@@ -54,8 +54,7 @@ BEVDet::BEVDet(
   std::vector<int> interval_starts;
   std::vector<int> interval_lengths;
 
-  initViewTransformer(
-    ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths);
+  initViewTransformer(ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths);
   auto end = high_resolution_clock::now();
   duration<float> t = end - start;
   RCLCPP_INFO(
@@ -83,8 +82,8 @@ BEVDet::BEVDet(
     trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev.data(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth.data(),
-    valid_feat_num_ * sizeof(int), cudaMemcpyHostToDevice));
+    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth.data(), valid_feat_num_ * sizeof(int),
+    cudaMemcpyHostToDevice));
   CHECK_CUDA(cudaMemcpy(
     trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat.data(), valid_feat_num_ * sizeof(int),
     cudaMemcpyHostToDevice));
@@ -263,9 +262,8 @@ void BEVDet::mallocDeviceMemory()
 }
 
 void BEVDet::initViewTransformer(
-  std::vector<int> & ranks_bev, std::vector<int> & ranks_depth,
-  std::vector<int> & ranks_feat, std::vector<int> & interval_starts,
-  std::vector<int> & interval_lengths)
+  std::vector<int> & ranks_bev, std::vector<int> & ranks_depth, std::vector<int> & ranks_feat,
+  std::vector<int> & interval_starts, std::vector<int> & interval_lengths)
 {
   int num_points = n_img_ * depth_num_ * feat_h_ * feat_w_;
   std::vector<Eigen::Vector3f> frustum(num_points);
@@ -325,7 +323,8 @@ void BEVDet::initViewTransformer(
     }
   }
 
-  valid_feat_num_ = kept.size();;
+  valid_feat_num_ = kept.size();
+  ;
   ranks_depth.resize(valid_feat_num_);
   ranks_feat.resize(valid_feat_num_);
   ranks_bev.resize(valid_feat_num_);
@@ -334,7 +333,7 @@ void BEVDet::initViewTransformer(
   for (int i = 0; i < valid_feat_num_; i++) {
     Eigen::Vector3f & p = frustum[kept[i]];
     ranks_bev[i] = static_cast<int>(p.z()) * xgrid_num_ * ygrid_num_ +
-                        static_cast<int>(p.y()) * xgrid_num_ + static_cast<int>(p.x());
+                   static_cast<int>(p.y()) * xgrid_num_ + static_cast<int>(p.x());
     order[i] = i;
   }
 

From 04aa368f133f65955c60eba4bdaa27fbed5c0cf8 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 19 Nov 2024 14:53:24 +0800
Subject: [PATCH 77/85] refactor: migration of open-source tensorrt-bevdet
 source code to external bevdet_vendor library

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware_tensorrt_bevdet/CMakeLists.txt   |  94 +--
 perception/autoware_tensorrt_bevdet/README.md |  10 +-
 .../config/bevdet.param.yaml                  |   4 +-
 .../tensorrt_bevdet/alignbev_plugin.hpp       | 119 ---
 .../autoware/tensorrt_bevdet/bevdet.hpp       | 239 ------
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  |  64 +-
 .../tensorrt_bevdet/bevpool_plugin.hpp        | 123 ----
 .../autoware/tensorrt_bevdet/common.hpp       | 148 ----
 .../tensorrt_bevdet/cpu_jpegdecoder.hpp       |  24 -
 .../include/autoware/tensorrt_bevdet/data.hpp |  72 --
 .../tensorrt_bevdet/gatherbev_plugin.hpp      | 120 ---
 .../autoware/tensorrt_bevdet/iou3d_nms.hpp    |  74 --
 .../tensorrt_bevdet/nvjpegdecoder.hpp         | 118 ---
 .../autoware/tensorrt_bevdet/postprocess.hpp  |  72 --
 .../autoware/tensorrt_bevdet/preprocess.hpp   |  21 -
 .../tensorrt_bevdet/preprocess_plugin.hpp     | 122 ----
 .../autoware/tensorrt_bevdet/ros_utils.hpp    |  46 ++
 .../launch/tensorrt_bevdet.launch.xml         |   2 +-
 .../autoware_tensorrt_bevdet/package.xml      |  14 +-
 .../src/alignbev_plugin.cu                    | 352 ---------
 .../autoware_tensorrt_bevdet/src/bevdet.cpp   | 687 ------------------
 .../src/bevdet_node.cpp                       | 169 +----
 .../src/bevpool_plugin.cu                     | 364 ----------
 .../src/cpu_jpegdecoder.cpp                   |  93 ---
 .../autoware_tensorrt_bevdet/src/data.cpp     |  84 ---
 .../src/gatherbev_plugin.cu                   | 308 --------
 .../autoware_tensorrt_bevdet/src/iou3d_nms.cu | 633 ----------------
 .../src/nvjpegdecoder.cpp                     | 299 --------
 .../src/postprocess.cu                        | 212 ------
 .../src/preprocess.cu                         |  37 -
 .../src/preprocess_plugin.cu                  | 347 ---------
 .../src/ros_utils.cpp                         | 117 +++
 32 files changed, 249 insertions(+), 4939 deletions(-)
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
 create mode 100644 perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/bevdet.cpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/data.cpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/postprocess.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/preprocess.cu
 delete mode 100644 perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
 create mode 100644 perception/autoware_tensorrt_bevdet/src/ros_utils.cpp

diff --git a/perception/autoware_tensorrt_bevdet/CMakeLists.txt b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
index a383274e5a617..e9e8c36e18bd4 100644
--- a/perception/autoware_tensorrt_bevdet/CMakeLists.txt
+++ b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
@@ -1,102 +1,52 @@
 cmake_minimum_required(VERSION 3.17)
 project(autoware_tensorrt_bevdet)
 
-add_compile_options(-W)
 add_compile_options(-std=c++17)
-set(CMAKE_CXX_FLAGS_RELEASE "-Wno-deprecated-declarations -O2")
-set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++17 -O0 -Xcompiler -fPIC -g -w -gencode=arch=compute_86,code=sm_86")
-find_package(tensorrt_common)
-if(NOT ${tensorrt_common_FOUND})
-  message(WARNING "The tensorrt_common package is not found. Please check its dependencies.")
+
+find_package(bevdet_vendor)
+if(NOT ${bevdet_vendor_FOUND})
+  message(WARNING "The bevdet_vendor package is not found. Please check its dependencies.")
   return()
 endif()
 
-find_package(ament_cmake REQUIRED)
-find_package(cudnn_cmake_module REQUIRED)
-find_package(tensorrt_cmake_module REQUIRED)
+find_package(autoware_cmake REQUIRED)
+autoware_package()
 find_package(rclcpp REQUIRED)
-find_package(yaml-cpp REQUIRED)
+find_package(rclcpp_components REQUIRED)
 find_package(Eigen3 REQUIRED)
-find_package(OpenCV REQUIRED)
-find_package(PCL REQUIRED)
 find_package(sensor_msgs REQUIRED)
-find_package(pcl_conversions REQUIRED)
 find_package(cv_bridge REQUIRED)
 find_package(tf2_geometry_msgs REQUIRED)
 find_package(autoware_perception_msgs REQUIRED)
 
-find_package(CUDA REQUIRED)
-find_package(CUDAToolkit)
-find_package(CUDNN)
-find_package(TENSORRT)
-if(NOT (CUDAToolkit_FOUND AND CUDNN_FOUND AND TENSORRT_FOUND))
-  message(WARNING "cuda, cudnn, tensorrt libraries are not found")
-  return()
-endif()
+ament_auto_add_library(${PROJECT_NAME}_component SHARED
+    src/bevdet_node.cpp
+    src/ros_utils.cpp
+)
 
 include_directories(
   include
-  SYSTEM
-  ${YAML_CPP_INCLUDE_DIRS}
   ${EIGEN3_INCLUDE_DIRS}
   ${OpenCV_INCLUDE_DIRS}
-  ${PCL_INCLUDE_DIRS}
   ${cv_bridge_INCLUDE_DIRS}
   ${tf2_geometry_msgs_INCLUDE_DIRS}
   ${autoware_perception_msgs_INCLUDE_DIRS}
+  ${bevdet_vendor_INCLUDE_DIRS}
 )
 
-cuda_add_executable(${PROJECT_NAME}_node
-    src/bevdet_node.cpp
-    src/bevdet.cpp
-    src/preprocess.cu
-    src/iou3d_nms.cu
-    src/postprocess.cu
-    src/data.cpp
-    src/cpu_jpegdecoder.cpp
-    src/nvjpegdecoder.cpp
-
-    src/preprocess_plugin.cu
-    src/bevpool_plugin.cu
-    src/alignbev_plugin.cu
-    src/gatherbev_plugin.cu
-)
-
-# Link libraries to executable
-ament_target_dependencies(${PROJECT_NAME}_node
-  "rclcpp"
-  "sensor_msgs"
-  "pcl_conversions"
-  "cv_bridge"
-  "autoware_perception_msgs"
-  "tf2_geometry_msgs"
-)
-
-target_link_libraries(${PROJECT_NAME}_node
-    yaml-cpp
-    libnvinfer.so
-    libnvonnxparser.so
-    libz.so
-    libjpeg.so
+target_link_libraries(${PROJECT_NAME}_component
     rclcpp::rclcpp
-    stdc++fs
-    ${NVINFER}
-    ${TENSORRT_LIBRARIES}
-    ${CUDA_LIBRARIES}
-    ${CUBLAS_LIBRARIES}
-    ${CUDNN_LIBRARY}
+    ${bevdet_vendor_TARGETS}
     ${OpenCV_LIBS}
-    ${PCL_LIBRARIES}
 )
 
-install(TARGETS ${PROJECT_NAME}_node
-  DESTINATION lib/${PROJECT_NAME}
-)
+rclcpp_components_register_node(${PROJECT_NAME}_component
+    PLUGIN "autoware::tensorrt_bevdet::TRTBEVDetNode"
+    EXECUTABLE ${PROJECT_NAME}_node
+  )
 
-install(DIRECTORY
-  launch
-  config
-  DESTINATION share/${PROJECT_NAME}/
+ament_auto_package(
+  INSTALL_TO_SHARE
+    launch
+    config
 )
-
-ament_package()
diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index ce6b1cc02c2bb..fc508d56699e7 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -2,11 +2,15 @@
 
 ## Purpose
 
-tensorrt_bevdet is a dynamic 3D bev object detection package based on 6 surround view cameras.
+The core algorithm, named `BEVDet`, it unifies multi-view images into the perspective of BEV for 3D object detection task.
 
 ## Inner-workings / Algorithms
 
-BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies multi-view images into the perspective of BEV for 3D object detection task. In this implementation, BEVDet network to inference with TensorRT.
+### Cite
+<!-- cspell: ignore Junjie Huang, Guan Huang -->
+- Junjie Huang, Guan Huang, "BEVPoolv2: A Cutting-edge Implementation of BEVDet Toward Deployment", [[ref](https://arxiv.org/pdf/2211.17111)]
+- [bevdet_vendor]() package are copied from the [original codes](https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one) (The TensorRT, C++ implementation by LCH1238) and modified.
+- This package is ported version toward Autoware from [bevdet_vendor](). 
 
 ## Inputs / Outputs
 
@@ -44,7 +48,7 @@ BEVDet is a BEV perception algorithm based on panoramic cameras. It unifies mult
 
 2. play ros2 bag of nuScenes data
 
-   Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics.
+   Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics for NuScenes dataset.
 
 ## Limitation
 
diff --git a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
index e7140328a5e6e..bbaef399687ad 100644
--- a/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
+++ b/perception/autoware_tensorrt_bevdet/config/bevdet.param.yaml
@@ -4,9 +4,7 @@
       onnx_path: "$(var model_path)/$(var model_name).onnx"
       engine_path: "$(var model_path)/$(var model_name).engine"
       data_params:
-        N: 6  # camera num
-        H: 900 # image height
-        W: 1600 # image width
+        CAM_NUM: 6
         cams: ["CAM_FRONT_LEFT", "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_LEFT", "CAM_BACK", "CAM_BACK_RIGHT"]
       post_process_params:
         # post-process params
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
deleted file mode 100644
index beeeb8ed656d8..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/alignbev_plugin.hpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
-
-#include <NvInfer.h>
-#include <unistd.h>
-
-#include <cmath>
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace
-{
-static const char * ALIGN_PLUGIN_NAME{"AlignBEV"};
-static const char * ALIGN_PLUGIN_VERSION{"1"};
-}  // namespace
-
-namespace nvinfer1
-{
-class AlignBEVPlugin : public IPluginV2DynamicExt
-{
-private:
-  const std::string name_;
-  std::string namespace_;
-  struct
-  {
-  } m_;
-
-public:
-  AlignBEVPlugin() = delete;
-  explicit AlignBEVPlugin(const std::string & name);
-  AlignBEVPlugin(const std::string & name, const void * buffer, size_t length);
-  ~AlignBEVPlugin();
-
-  // Method inherited from IPluginV2
-  const char * getPluginType() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  int32_t getNbOutputs() const noexcept override;
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void * buffer) const noexcept override;
-  void destroy() noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-
-  // Method inherited from IPluginV2Ext
-  DataType getOutputDataType(
-    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
-  void attachToContext(
-    cudnnContext * contextCudnn, cublasContext * contextCublas,
-    IGpuAllocator * gpuAllocator) noexcept override;
-  void detachFromContext() noexcept override;
-
-  // Method inherited from IPluginV2DynamicExt
-  IPluginV2DynamicExt * clone() const noexcept override;
-  DimsExprs getOutputDimensions(
-    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-    IExprBuilder & exprBuilder) noexcept override;
-  bool supportsFormatCombination(
-    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
-    int32_t nbOutputs) noexcept override;
-  void configurePlugin(
-    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-    int32_t nbOutputs) noexcept override;
-  size_t getWorkspaceSize(
-    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-    int32_t nbOutputs) const noexcept override;
-  int32_t enqueue(
-    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-    const void * const * inputs, void * const * outputs, void * workspace,
-    cudaStream_t stream) noexcept override;
-
-protected:
-  // To prevent compiler warnings
-  using nvinfer1::IPluginV2::enqueue;
-  using nvinfer1::IPluginV2::getOutputDimensions;
-  using nvinfer1::IPluginV2::getWorkspaceSize;
-  using nvinfer1::IPluginV2Ext::configurePlugin;
-};
-
-class AlignBEVPluginCreator : public IPluginCreator
-{
-private:
-  static PluginFieldCollection fc_;
-  static std::vector<PluginField> attr_;
-  std::string namespace_;
-
-public:
-  AlignBEVPluginCreator();
-  ~AlignBEVPluginCreator();
-  const char * getPluginName() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  const PluginFieldCollection * getFieldNames() noexcept override;
-  IPluginV2DynamicExt * createPlugin(
-    const char * name, const PluginFieldCollection * fc) noexcept override;
-  IPluginV2DynamicExt * deserializePlugin(
-    const char * name, const void * serialData, size_t serialLength) noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-};
-
-}  // namespace nvinfer1
-#endif  // AUTOWARE__TENSORRT_BEVDET__ALIGNBEV_PLUGIN_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
deleted file mode 100644
index 8c6241ba4e647..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet.hpp
+++ /dev/null
@@ -1,239 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
-
-#include "NvInfer.h"
-#include "common.hpp"
-#include "data.hpp"
-#include "postprocess.hpp"
-
-#include <Eigen/Core>
-#include <Eigen/Geometry>
-
-#include <NvOnnxParser.h>
-#include <yaml-cpp/yaml.h>
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace autoware::tensorrt_bevdet
-{
-class AdjFrame
-{
-public:
-  AdjFrame() {}
-  AdjFrame(int n, size_t buf_size)
-  : n_(n), buf_size_(buf_size), scenes_token_(n), ego2global_rot_(n), ego2global_trans_(n)
-  {
-    CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&adj_buffer), n * buf_size));
-    CHECK_CUDA(cudaMemset(adj_buffer, 0, n * buf_size));
-    last_ = 0;
-    buffer_num_ = 0;
-    init_ = false;
-
-    for (auto & rot : ego2global_rot_) {
-      rot = Eigen::Quaternion<float>(0.f, 0.f, 0.f, 0.f);
-    }
-    for (auto & trans : ego2global_trans_) {
-      trans = Eigen::Translation3f(0.f, 0.f, 0.f);
-    }
-  }
-  const std::string & lastScenesToken() const { return scenes_token_[last_]; }
-
-  void reset()
-  {
-    last_ = 0;  // origin -1
-    buffer_num_ = 0;
-    init_ = false;
-  }
-
-  void saveFrameBuffer(
-    const void * curr_buffer, const std::string & curr_token,
-    const Eigen::Quaternion<float> & ego2global_rot, const Eigen::Translation3f & ego2global_trans)
-  {
-    int iters = init_ ? 1 : n_;
-    while (iters--) {
-      last_ = (last_ + 1) % n_;
-      CHECK_CUDA(cudaMemcpy(
-        reinterpret_cast<char *>(adj_buffer) + last_ * buf_size_, curr_buffer, buf_size_,
-        cudaMemcpyDeviceToDevice));
-      scenes_token_[last_] = curr_token;
-      ego2global_rot_[last_] = ego2global_rot;
-      ego2global_trans_[last_] = ego2global_trans;
-      buffer_num_ = std::min(buffer_num_ + 1, n_);
-    }
-    init_ = true;
-  }
-  int havingBuffer(int idx) { return static_cast<int>(idx < buffer_num_); }
-
-  const void * getFrameBuffer(int idx)
-  {
-    idx = (-idx + last_ + n_) % n_;
-    return reinterpret_cast<char *>(adj_buffer) + idx * buf_size_;
-  }
-  void getEgo2Global(
-    int idx, Eigen::Quaternion<float> & adj_ego2global_rot,
-    Eigen::Translation3f & adj_ego2global_trans)
-  {
-    idx = (-idx + last_ + n_) % n_;
-    adj_ego2global_rot = ego2global_rot_[idx];
-    adj_ego2global_trans = ego2global_trans_[idx];
-  }
-
-  ~AdjFrame() { CHECK_CUDA(cudaFree(adj_buffer)); }
-
-private:
-  int n_;
-  size_t buf_size_;
-
-  int last_;
-  int buffer_num_;
-  bool init_;
-
-  std::vector<std::string> scenes_token_;
-  std::vector<Eigen::Quaternion<float>> ego2global_rot_;
-  std::vector<Eigen::Translation3f> ego2global_trans_;
-
-  void * adj_buffer;
-};
-
-class BEVDet
-{
-public:
-  BEVDet() {}
-  BEVDet(
-    const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> cams_intrin,
-    std::vector<Eigen::Quaternion<float>> cams2ego_rot,
-    std::vector<Eigen::Translation3f> cams2ego_trans, const std::string & onnx_file,
-    const std::string & engine_file);
-
-  int doInfer(
-    const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx = -1);
-  ~BEVDet();
-
-protected:
-  void initParams(const std::string & config_file);
-
-  void initViewTransformer(
-    std::vector<int> & ranks_bev, std::vector<int> & ranks_depth, std::vector<int> & ranks_feat,
-    std::vector<int> & interval_starts, std::vector<int> & interval_lengths);
-  void exportEngine(const std::string & onnxFile, const std::string & trtFile);
-  int initEngine(const std::string & engine_file);
-
-  int deserializeTRTEngine(const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr);
-
-  void mallocDeviceMemory();
-
-  void initCamParams(
-    const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
-    const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
-    const std::vector<Eigen::Matrix3f> & cams_intrin);
-
-  void getAdjBEVFeature(
-    const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
-    const Eigen::Translation3f & ego2global_trans);
-
-  void getCurr2AdjTransform(
-    const Eigen::Quaternion<float> & curr_ego2global_rot,
-    const Eigen::Quaternion<float> & adj_ego2global_rot,
-    const Eigen::Translation3f & curr_ego2global_trans,
-    const Eigen::Translation3f & adj_ego2global_trans, float * transform_dev);
-
-private:
-  int n_img_;
-
-  int src_img_h_;
-  int src_img_w_;
-  int input_img_h_;
-  int input_img_w_;
-  int crop_h_;
-  int crop_w_;
-  float resize_radio_;
-  int down_sample_;
-  int feat_h_;
-  int feat_w_;
-  int bev_h_;
-  int bev_w_;
-  int bevpool_channel_;
-
-  float depth_start_;
-  float depth_end_;
-  float depth_step_;
-  int depth_num_;
-
-  float x_start_;
-  float x_end_;
-  float x_step_;
-  int xgrid_num_;
-
-  float y_start_;
-  float y_end_;
-  float y_step_;
-  int ygrid_num_;
-
-  float z_start_;
-  float z_end_;
-  float z_step_;
-  int zgrid_num_;
-
-  std::vector<float> mean_;
-  std::vector<float> std_;
-
-  bool use_depth_;
-  bool use_adj_;
-  int adj_num_;
-
-  int class_num_;
-  float score_thresh_;
-  float nms_overlap_thresh_;
-  int nms_pre_maxnum_;
-  int nms_post_maxnum_;
-  std::vector<float> nms_rescale_factor_;
-  std::vector<int> class_num_pre_task_;
-  std::map<std::string, int> out_num_task_head_;
-
-  std::vector<Eigen::Matrix3f> cams_intrin_;
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot_;
-  std::vector<Eigen::Translation3f> cams2ego_trans_;
-
-  Eigen::Matrix3f post_rot_;
-  Eigen::Translation3f post_trans_;
-
-  std::vector<size_t> trt_buffer_sizes_;
-  void ** trt_buffer_dev_;
-  std::vector<float> cam_params_host_;
-  void ** post_buffer_;
-
-  std::map<std::string, int> buffer_map_;
-
-  int valid_feat_num_;
-  int unique_bev_num_;
-
-  int transform_size_;
-  int cam_params_size_;
-
-  Logger g_logger_;
-
-  nvinfer1::ICudaEngine * trt_engine_;
-  nvinfer1::IExecutionContext * trt_context_;
-
-  std::unique_ptr<PostprocessGPU> postprocess_ptr_;
-  std::unique_ptr<AdjFrame> adj_frame_ptr_;
-};
-}  // namespace autoware::tensorrt_bevdet
-#endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 1a493bb2f8889..25ef4bd730e95 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -16,8 +16,9 @@
 #ifndef AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 
-#include "bevdet.hpp"
-#include "cpu_jpegdecoder.hpp"
+#include "autoware/tensorrt_bevdet/ros_utils.hpp"
+#include <cpu_jpegdecoder.h>
+
 #include "tf2_ros/buffer.h"
 #include "tf2_ros/transform_listener.h"
 
@@ -25,27 +26,12 @@
 #include <Eigen/Geometry>
 #include <opencv2/opencv.hpp>
 
-#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
-#include <autoware_perception_msgs/msg/detected_objects.hpp>
-#include <autoware_perception_msgs/msg/object_classification.hpp>
-#include <autoware_perception_msgs/msg/shape.hpp>
-#include <geometry_msgs/msg/point32.hpp>
-#include <geometry_msgs/msg/quaternion.hpp>
-#include <geometry_msgs/msg/vector3.hpp>
-#include <sensor_msgs/msg/camera_info.hpp>
 #include <sensor_msgs/msg/image.hpp>
-#include <sensor_msgs/msg/point_cloud2.hpp>
-#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
 
 #include <cv_bridge/cv_bridge.h>
 #include <message_filters/subscriber.h>
 #include <message_filters/sync_policies/approximate_time.h>
 #include <message_filters/synchronizer.h>
-#include <pcl/common/transforms.h>
-#include <pcl/io/pcd_io.h>
-#include <pcl/point_types.h>
-#include <pcl_conversions/pcl_conversions.h>  // msg2pcl
-#include <yaml-cpp/yaml.h>
 
 #include <cassert>
 #include <chrono>
@@ -58,21 +44,10 @@
 #include <thread>
 #include <vector>
 
-uint8_t getSemanticType(const std::string & class_name);
-
-void box3DToDetectedObjects(
-  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
-  const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
-
-// Get the rotation and translation from a geometry_msgs::msg::TransformStamped
-void getTransform(
-  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
-  Eigen::Translation3f & translation);
-
-// Get the camera intrinsics from a sensor_msgs::msg::CameraInfo
-void getCameraIntrinsics(
-  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
-
+namespace autoware
+{
+namespace tensorrt_bevdet
+{
 class TRTBEVDetNode : public rclcpp::Node
 {
   /**
@@ -94,11 +69,11 @@ class TRTBEVDetNode : public rclcpp::Node
   std::vector<std::string> class_names_;  ///< Names of the object classes
 
   camsData sampleData_;  ///< Sample data for camera parameters
-  std::shared_ptr<autoware::tensorrt_bevdet::BEVDet>
-    bevdet_;  ///< Object for performing object detection
+  std::shared_ptr<BEVDet> bevdet_;  ///< Object for performing object detection
 
   uchar * imgs_dev_ = nullptr;  ///< Device pointer for storing the images
   float score_thre_;            ///< Score threshold for object detection
+  bool has_twist_ = true;  /// wether set twist for objects
 
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
     pub_boxes_;  ///< Publisher for publishing the detected objects
@@ -116,10 +91,9 @@ class TRTBEVDetNode : public rclcpp::Node
     sub_bl_caminfo_;  ///< Subscriber for back-left camera info
   rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
     sub_br_caminfo_;  ///< Subscriber for back-right camera info
-  std::vector<bool>
-    caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
-  bool camera_info_received_flag_ =
-    false;                    ///< Flag indicating if camera info has been received for all cameras
+
+  std::vector<bool> caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
+  bool camera_info_received_flag_ = false;  ///< Flag indicating if camera info has been received for all cameras
   bool initialized_ = false;  /// Flag indicating if img_w_ and img_h_ has been initialized
 
   // tf listener
@@ -128,11 +102,11 @@ class TRTBEVDetNode : public rclcpp::Node
   std::unique_ptr<tf2_ros::Buffer> tf_buffer_;  ///< Buffer for storing TF transforms
 
   // Camera parameters;
-  std::vector<Eigen::Matrix3f> cams_intrin;  ///< Intrinsic camera parameters for each camera
+  std::vector<Eigen::Matrix3f> cams_intrin_;  ///< Intrinsic camera parameters for each camera
   std::vector<Eigen::Quaternion<float>>
-    cams2ego_rot;  ///< Rotation from camera frame to ego frame for each camera
+    cams2ego_rot_;  ///< Rotation from camera frame to ego frame for each camera
   std::vector<Eigen::Translation3f>
-    cams2ego_trans;  ///< Translation from camera frame to ego frame for each camera
+    cams2ego_trans_;  ///< Translation from camera frame to ego frame for each camera
 
   // Subscribers of images for each camera, synchronized
   message_filters::Subscriber<sensor_msgs::msg::Image>
@@ -182,10 +156,9 @@ class TRTBEVDetNode : public rclcpp::Node
 public:
   /**
    * @brief Constructor for TRTBEVDetNode.
-   * @param node_name The name of the node.
    * @param options The options for the node.
    */
-  TRTBEVDetNode(const std::string & node_name, const rclcpp::NodeOptions & options);
+  TRTBEVDetNode(const rclcpp::NodeOptions & options);
 
   /**
    * @brief Destructor for TRTBEVDetNode.
@@ -215,7 +188,8 @@ class TRTBEVDetNode : public rclcpp::Node
    * @param idx The index of the camera.
    * @param msg The camera info message.
    */
-  void camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg);
+  void cameraInfoCallback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg);
 };
-
+}  // namespace tensorrt_bevdet
+}  // namespace autoware
 #endif  // AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
deleted file mode 100644
index 0a1f6e114589a..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevpool_plugin.hpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
-
-#include <NvInfer.h>
-#include <unistd.h>
-
-#include <cmath>
-#include <iostream>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace
-{
-static const char * BEVPOOL_PLUGIN_NAME{"BEVPool"};
-static const char * BEVPOOL_PLUGIN_VERSION{"1"};
-}  // namespace
-
-namespace nvinfer1
-{
-class BEVPoolPlugin : public IPluginV2DynamicExt
-{
-private:
-  const std::string name_;
-  std::string namespace_;
-  struct
-  {
-    int bev_h;
-    int bev_w;
-    int n;
-  } m_;
-
-public:
-  BEVPoolPlugin() = delete;
-  BEVPoolPlugin(const std::string & name, int bev_h, int bev_w, int n);
-  BEVPoolPlugin(const std::string & name, const void * buffer, size_t length);
-  ~BEVPoolPlugin();
-
-  // Method inherited from IPluginV2
-  const char * getPluginType() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  int32_t getNbOutputs() const noexcept override;
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void * buffer) const noexcept override;
-  void destroy() noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-
-  // Method inherited from IPluginV2Ext
-  DataType getOutputDataType(
-    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
-  void attachToContext(
-    cudnnContext * contextCudnn, cublasContext * contextCublas,
-    IGpuAllocator * gpuAllocator) noexcept override;
-  void detachFromContext() noexcept override;
-
-  // Method inherited from IPluginV2DynamicExt
-  IPluginV2DynamicExt * clone() const noexcept override;
-  DimsExprs getOutputDimensions(
-    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-    IExprBuilder & exprBuilder) noexcept override;
-  bool supportsFormatCombination(
-    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
-    int32_t nbOutputs) noexcept override;
-  void configurePlugin(
-    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-    int32_t nbOutputs) noexcept override;
-  size_t getWorkspaceSize(
-    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-    int32_t nbOutputs) const noexcept override;
-  int32_t enqueue(
-    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-    const void * const * inputs, void * const * outputs, void * workspace,
-    cudaStream_t stream) noexcept override;
-
-protected:
-  // To prevent compiler warnings
-  using nvinfer1::IPluginV2::enqueue;
-  using nvinfer1::IPluginV2::getOutputDimensions;
-  using nvinfer1::IPluginV2::getWorkspaceSize;
-  using nvinfer1::IPluginV2Ext::configurePlugin;
-};
-
-class BEVPoolPluginCreator : public IPluginCreator
-{
-private:
-  static PluginFieldCollection fc_;
-  static std::vector<PluginField> attr_;
-  std::string namespace_;
-
-public:
-  BEVPoolPluginCreator();
-  ~BEVPoolPluginCreator();
-  const char * getPluginName() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  const PluginFieldCollection * getFieldNames() noexcept override;
-  IPluginV2DynamicExt * createPlugin(
-    const char * name, const PluginFieldCollection * fc) noexcept override;
-  IPluginV2DynamicExt * deserializePlugin(
-    const char * name, const void * serialData, size_t serialLength) noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-};
-
-}  // namespace nvinfer1
-#endif  // AUTOWARE__TENSORRT_BEVDET__BEVPOOL_PLUGIN_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
deleted file mode 100644
index a1a745d9e604f..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/common.hpp
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
-
-#include "rclcpp/logger.hpp"
-#include "rclcpp/rclcpp.hpp"
-
-#include <NvInfer.h>
-#include <assert.h>
-#include <cuda_runtime.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <thrust/sort.h>
-
-#include <iostream>
-#include <string>
-
-typedef unsigned char uchar;
-
-#define NUM_THREADS 512
-
-#define CHECK_CUDA(ans)                   \
-  {                                       \
-    GPUAssert((ans), __FILE__, __LINE__); \
-  }
-
-#define DIVUP(m, n) (((m) + (n) - 1) / (n))
-
-inline void GPUAssert(cudaError_t code, const char * file, int line, bool abort = true)
-{
-  if (code != cudaSuccess) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("GPUAssert"), "GPUassert: %s %s %d", cudaGetErrorString(code), file, line);
-    if (abort) exit(code);
-  }
-};
-
-#define CUDA_1D_KERNEL_LOOP(i, n) \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); i += blockDim.x * gridDim.x)
-
-const int MAXTENSORDIMS = 6;
-struct TensorDesc
-{
-  int shape[MAXTENSORDIMS];
-  int stride[MAXTENSORDIMS];
-  int dim;
-};
-
-inline int GET_BLOCKS(const int N)
-{
-  int optimal_block_num = DIVUP(N, NUM_THREADS);
-  int max_block_num = 4096;
-  return optimal_block_num < max_block_num ? optimal_block_num : max_block_num;
-}
-
-__inline__ std::string dataTypeToString(nvinfer1::DataType dataType)
-{
-  switch (dataType) {
-    case nvinfer1::DataType::kFLOAT:
-      return std::string("FP32 ");
-    case nvinfer1::DataType::kHALF:
-      return std::string("FP16 ");
-    case nvinfer1::DataType::kINT8:
-      return std::string("INT8 ");
-    case nvinfer1::DataType::kINT32:
-      return std::string("INT32");
-    case nvinfer1::DataType::kBOOL:
-      return std::string("BOOL ");
-    default:
-      return std::string("Unknown");
-  }
-}
-
-__inline__ size_t dataTypeToSize(nvinfer1::DataType dataType)
-{
-  switch (dataType) {
-    case nvinfer1::DataType::kFLOAT:
-      return 4;
-    case nvinfer1::DataType::kHALF:
-      return 2;
-    case nvinfer1::DataType::kINT8:
-      return 1;
-    case nvinfer1::DataType::kINT32:
-      return 4;
-    case nvinfer1::DataType::kBOOL:
-      return 1;
-    default:
-      return 4;
-  }
-}
-
-__inline__ std::string shapeToString(nvinfer1::Dims32 dim)
-{
-  std::string output("(");
-  if (dim.nbDims == 0) {
-    return output + std::string(")");
-  }
-  for (int i = 0; i < dim.nbDims - 1; ++i) {
-    output += std::to_string(dim.d[i]) + std::string(", ");
-  }
-  output += std::to_string(dim.d[dim.nbDims - 1]) + std::string(")");
-  return output;
-}
-
-class Logger : public nvinfer1::ILogger
-{
-public:
-  explicit Logger(Severity severity = Severity::kWARNING) : reportable_severity(severity) {}
-
-  void log(Severity severity, const char * msg) noexcept override
-  {
-    // suppress messages with severity enum value greater than the reportable
-    if (severity > reportable_severity) return;
-    switch (severity) {
-      case Severity::kINTERNAL_ERROR:
-        RCLCPP_ERROR(rclcpp::get_logger("autoware_tensorrt_bevdet"), "INTERNAL_ERROR: %s", msg);
-        break;
-      case Severity::kERROR:
-        RCLCPP_ERROR(rclcpp::get_logger("autoware_tensorrt_bevdet"), "ERROR: %s", msg);
-        break;
-      case Severity::kWARNING:
-        RCLCPP_WARN(rclcpp::get_logger("autoware_tensorrt_bevdet"), "WARNING: %s", msg);
-        break;
-      case Severity::kINFO:
-        RCLCPP_INFO(rclcpp::get_logger("autoware_tensorrt_bevdet"), "INFO: %s", msg);
-        break;
-      default:
-        RCLCPP_INFO(rclcpp::get_logger("autoware_tensorrt_bevdet"), "UNKNOWN: %s", msg);
-        break;
-    }
-  }
-
-  Severity reportable_severity;
-};
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__COMMON_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
deleted file mode 100644
index 94a301976fe6a..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
-
-#include "common.hpp"
-
-#include <vector>
-
-int decode_cpu(
-  const std::vector<std::vector<char>> & files_data, uchar * out_imgs, size_t width, size_t height);
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__CPU_JPEGDECODER_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
deleted file mode 100644
index 153795b9d0f16..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/data.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// cspell:ignore BEVDET, intrin, Quater, BEVDET
-#ifndef AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
-
-#include "common.hpp"
-#include "nvjpegdecoder.hpp"
-
-#include <Eigen/Core>
-#include <Eigen/Geometry>
-
-#include <yaml-cpp/yaml.h>
-
-#include <string>
-#include <vector>
-
-struct camParams
-{
-  camParams() = default;
-  camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name);
-  camParams(
-    const std::vector<Eigen::Matrix3f> & _cams_intrin,
-    const std::vector<Eigen::Quaternion<float>> & _cams2ego_rot,
-    const std::vector<Eigen::Translation3f> & _cams2ego_trans)
-  : cams_intrin(_cams_intrin), cams2ego_rot(_cams2ego_rot), cams2ego_trans(_cams2ego_trans)
-  {
-  }
-
-  int N_img;
-
-  Eigen::Quaternion<float> ego2global_rot;
-  Eigen::Translation3f ego2global_trans;
-
-  Eigen::Quaternion<float> lidar2ego_rot;
-  Eigen::Translation3f lidar2ego_trans;
-  //
-  std::vector<Eigen::Matrix3f> cams_intrin;
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot;
-  std::vector<Eigen::Translation3f> cams2ego_trans;
-  //
-  std::vector<std::string> imgs_file;
-
-  std::int64_t timestamp;
-  std::string scene_token;
-};
-
-struct camsData
-{
-  camsData() = default;
-  explicit camsData(const camParams & _param) : param(_param), imgs_dev(nullptr) {}
-  camParams param;
-  uchar * imgs_dev;
-};
-
-Eigen::Translation3f fromYamlTrans(YAML::Node x);
-Eigen::Quaternion<float> fromYamlQuater(YAML::Node x);
-Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x);
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__DATA_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
deleted file mode 100644
index 3d91763ed88a1..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/gatherbev_plugin.hpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
-
-#include <NvInfer.h>
-#include <unistd.h>
-
-#include <cmath>
-#include <iostream>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace
-{
-static const char * GATHERBEV_PLUGIN_NAME{"GatherBEV"};
-static const char * GATHERBEV_PLUGIN_VERSION{"1"};
-}  // namespace
-
-namespace nvinfer1
-{
-class GatherBEVPlugin : public IPluginV2DynamicExt
-{
-private:
-  const std::string name_;
-  std::string namespace_;
-  struct
-  {
-  } m_;
-
-public:
-  GatherBEVPlugin() = delete;
-  explicit GatherBEVPlugin(const std::string & name);
-  GatherBEVPlugin(const std::string & name, const void * buffer, size_t length);
-  ~GatherBEVPlugin();
-
-  // Method inherited from IPluginV2
-  const char * getPluginType() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  int32_t getNbOutputs() const noexcept override;
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void * buffer) const noexcept override;
-  void destroy() noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-
-  // Method inherited from IPluginV2Ext
-  DataType getOutputDataType(
-    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
-  void attachToContext(
-    cudnnContext * contextCudnn, cublasContext * contextCublas,
-    IGpuAllocator * gpuAllocator) noexcept override;
-  void detachFromContext() noexcept override;
-
-  // Method inherited from IPluginV2DynamicExt
-  IPluginV2DynamicExt * clone() const noexcept override;
-  DimsExprs getOutputDimensions(
-    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-    IExprBuilder & exprBuilder) noexcept override;
-  bool supportsFormatCombination(
-    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
-    int32_t nbOutputs) noexcept override;
-  void configurePlugin(
-    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-    int32_t nbOutputs) noexcept override;
-  size_t getWorkspaceSize(
-    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-    int32_t nbOutputs) const noexcept override;
-  int32_t enqueue(
-    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-    const void * const * inputs, void * const * outputs, void * workspace,
-    cudaStream_t stream) noexcept override;
-
-protected:
-  // To prevent compiler warnings
-  using nvinfer1::IPluginV2::enqueue;
-  using nvinfer1::IPluginV2::getOutputDimensions;
-  using nvinfer1::IPluginV2::getWorkspaceSize;
-  using nvinfer1::IPluginV2Ext::configurePlugin;
-};
-
-class GatherBEVPluginCreator : public IPluginCreator
-{
-private:
-  static PluginFieldCollection fc_;
-  static std::vector<PluginField> attr_;
-  std::string namespace_;
-
-public:
-  GatherBEVPluginCreator();
-  ~GatherBEVPluginCreator();
-  const char * getPluginName() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  const PluginFieldCollection * getFieldNames() noexcept override;
-  IPluginV2DynamicExt * createPlugin(
-    const char * name, const PluginFieldCollection * fc) noexcept override;
-  IPluginV2DynamicExt * deserializePlugin(
-    const char * name, const void * serialData, size_t serialLength) noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-};
-
-}  // namespace nvinfer1
-#endif  // AUTOWARE__TENSORRT_BEVDET__GATHERBEV_PLUGIN_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
deleted file mode 100644
index 609b36717feab..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/iou3d_nms.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-/*
-3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
-Written by Shaoshuai Shi
-All Rights Reserved 2019-2022.
-*/
-#pragma once
-#ifndef AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
-
-#include "common.hpp"
-
-#include <stdio.h>
-#include <thrust/count.h>
-#include <thrust/device_vector.h>
-#include <thrust/gather.h>
-#include <thrust/host_vector.h>
-#include <thrust/sequence.h>
-#include <thrust/sort.h>
-#include <thrust/transform.h>
-
-#include <iostream>
-#include <vector>
-
-const int THREADS_PER_BLOCK = 16;
-const int THREADS_PER_BLOCK_NMS = sizeof(std::int64_t) * 8;
-const float EPS = 1e-8;
-
-class Iou3dNmsCuda
-{
-public:
-  Iou3dNmsCuda(const int head_x_size, const int head_y_size, const float nms_overlap_thresh);
-  ~Iou3dNmsCuda() = default;
-
-  int DoIou3dNms(
-    const int box_num_pre, const float * res_box, const int * res_sorted_indices,
-    std::int64_t * host_keep_data);
-
-private:
-  const int head_x_size_;
-  const int head_y_size_;
-  const float nms_overlap_thresh_;
-};
-
-static const int kBoxBlockSize = 9;
-
-struct Box
-{
-  float x;
-  float y;
-  float z;
-  float l;
-  float w;
-  float h;
-  float r;
-  float vx = 0.0f;  // optional
-  float vy = 0.0f;  // optional
-  float score;
-  int label;
-  bool is_drop;  // for nms
-};
-#endif  // AUTOWARE__TENSORRT_BEVDET__IOU3D_NMS_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
deleted file mode 100644
index dad94eb9c4af2..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/nvjpegdecoder.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
-
-#ifdef __HAVE_NVJPEG__
-#include "common.hpp"
-
-#include <dirent.h>
-#include <nvjpeg.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <thread>
-#include <vector>
-
-#define DECODE_RGB 3
-#define DECODE_BGR 4
-#define DECODE_RGBI 5
-#define DECODE_BGRI 6
-#define DECODE_UNCHANGED 0
-
-#define CHECK_NVJPEG(call)                    \
-  {                                           \
-    NvJpegAssert((call), __FILE__, __LINE__); \
-  }
-
-inline void NvJpegAssert(nvjpegStatus_t code, const char * file, int line)
-{
-  if (code != NVJPEG_STATUS_SUCCESS) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("autoware_tensorrt_bevdet"), "NVJPEG failure: '#%d' at %s:%d", code, file,
-      line);
-    exit(1);
-  }
-}
-
-struct decode_params_t
-{
-  decode_params_t() {}
-
-  cudaStream_t stream;
-
-  // used with decoupled API
-  nvjpegBufferPinned_t pinned_buffers[2];  // 2 buffers for pipelining
-  nvjpegBufferDevice_t device_buffer;
-  nvjpegJpegStream_t jpeg_streams[2];  //  2 streams for pipelining
-
-  nvjpegDecodeParams_t nvjpeg_decode_params;
-  nvjpegJpegDecoder_t nvjpeg_decoder;
-  nvjpegJpegState_t nvjpeg_decoupled_state;
-};
-
-struct share_params
-{
-  share_params() { fmt = NVJPEG_OUTPUT_UNCHANGED; }
-  explicit share_params(size_t _fmt)
-  {
-    if (
-      _fmt == DECODE_RGB || _fmt == DECODE_BGR || _fmt == DECODE_RGBI || _fmt == DECODE_BGRI ||
-      _fmt == DECODE_UNCHANGED) {
-      fmt = (nvjpegOutputFormat_t)_fmt;
-    } else {
-      RCLCPP_WARN(
-        rclcpp::get_logger("autoware_tensorrt_bevdet"), "Unknown format! Auto switch BGR!");
-      fmt = NVJPEG_OUTPUT_BGR;
-    }
-  }
-
-  nvjpegOutputFormat_t fmt;
-  nvjpegJpegState_t nvjpeg_state;
-  nvjpegHandle_t nvjpeg_handle;
-
-  bool hw_decode_available;
-};
-
-class nvjpegDecoder
-{
-public:
-  nvjpegDecoder() {}
-  nvjpegDecoder(size_t n, size_t _fmt);
-
-  int decode(const std::vector<std::vector<char>> & imgs_data, uchar * out_imgs);
-  int init();
-  ~nvjpegDecoder();
-
-private:
-  size_t N_img;
-  std::vector<nvjpegImage_t> iout;
-  std::vector<nvjpegImage_t> isz;
-
-  std::vector<int> widths;
-  std::vector<int> heights;
-
-  share_params share_param;
-  std::vector<decode_params_t> params;
-};
-
-#endif
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__NVJPEGDECODER_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
deleted file mode 100644
index 35a2fe0d44829..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/postprocess.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
-
-#include "common.hpp"
-#include "iou3d_nms.hpp"
-
-#include <memory>
-#include <vector>
-
-class PostprocessGPU
-{
-public:
-  PostprocessGPU() {}
-  PostprocessGPU(
-    const int _class_num, const float _score_thresh, const float _nms_thresh,
-    const int _nms_pre_maxnum, const int _nms_post_maxnum, const int _down_sample,
-    const int _output_h, const int _output_w, const float _x_step, const float _y_step,
-    const float _x_start, const float _y_start, const std::vector<int> & _class_num_pre_task,
-    const std::vector<float> & _nms_rescale_factor);
-
-  void DoPostprocess(void ** const bev_buffer, std::vector<Box> & out_detections);
-  ~PostprocessGPU();
-
-private:
-  int class_num;
-  float score_thresh;
-  float nms_thresh;
-  int nms_pre_maxnum;
-  int nms_post_maxnum;
-  int down_sample;
-  int output_h;
-  int output_w;
-  float x_step;
-  float y_step;
-  float x_start;
-  float y_start;
-  int map_size;
-  int task_num;
-
-  std::vector<int> class_num_pre_task;
-  std::vector<float> nms_rescale_factor;
-
-  std::unique_ptr<Iou3dNmsCuda> iou3d_nms;
-
-  float * boxes_dev = nullptr;
-  float * score_dev = nullptr;
-  int * cls_dev = nullptr;
-  int * valid_box_num = nullptr;
-  int * sorted_indices_dev = nullptr;
-  std::int64_t * keep_data_host = nullptr;
-  int * sorted_indices_host = nullptr;
-  float * boxes_host = nullptr;
-  float * score_host = nullptr;
-  int * cls_host = nullptr;
-
-  float * nms_rescale_factor_dev = nullptr;
-};
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__POSTPROCESS_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
deleted file mode 100644
index 1c09fc925d8b5..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
-
-#include "common.hpp"
-
-void convert_RGBHWC_to_BGRCHW(uchar * input, uchar * output, int channels, int height, int width);
-
-#endif  // AUTOWARE__TENSORRT_BEVDET__PREPROCESS_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
deleted file mode 100644
index ed6bbf861ab22..0000000000000
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/preprocess_plugin.hpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
-#define AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
-
-#include <NvInfer.h>
-#include <unistd.h>
-
-#include <cmath>
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace
-{
-static const char * PRE_PLUGIN_NAME{"Preprocess"};
-static const char * PRE_PLUGIN_VERSION{"1"};
-}  // namespace
-
-namespace nvinfer1
-{
-class PreprocessPlugin : public IPluginV2DynamicExt
-{
-private:
-  const std::string name_;
-  std::string namespace_;
-  struct
-  {
-    int crop_h;
-    int crop_w;
-    float resize_radio;
-  } m_;
-
-public:
-  PreprocessPlugin() = delete;
-  PreprocessPlugin(const std::string & name, int crop_h, int crop_w, float resize_radio);
-  PreprocessPlugin(const std::string & name, const void * buffer, size_t length);
-  ~PreprocessPlugin();
-
-  // Method inherited from IPluginV2
-  const char * getPluginType() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  int32_t getNbOutputs() const noexcept override;
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void * buffer) const noexcept override;
-  void destroy() noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-
-  // Method inherited from IPluginV2Ext
-  DataType getOutputDataType(
-    int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept override;
-  void attachToContext(
-    cudnnContext * contextCudnn, cublasContext * contextCublas,
-    IGpuAllocator * gpuAllocator) noexcept override;
-  void detachFromContext() noexcept override;
-
-  // Method inherited from IPluginV2DynamicExt
-  IPluginV2DynamicExt * clone() const noexcept override;
-  DimsExprs getOutputDimensions(
-    int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-    IExprBuilder & exprBuilder) noexcept override;
-  bool supportsFormatCombination(
-    int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs,
-    int32_t nbOutputs) noexcept override;
-  void configurePlugin(
-    const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-    int32_t nbOutputs) noexcept override;
-  size_t getWorkspaceSize(
-    const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-    int32_t nbOutputs) const noexcept override;
-  int32_t enqueue(
-    const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-    const void * const * inputs, void * const * outputs, void * workspace,
-    cudaStream_t stream) noexcept override;
-
-protected:
-  // To prevent compiler warnings
-  using nvinfer1::IPluginV2::enqueue;
-  using nvinfer1::IPluginV2::getOutputDimensions;
-  using nvinfer1::IPluginV2::getWorkspaceSize;
-  using nvinfer1::IPluginV2Ext::configurePlugin;
-};
-
-class PreprocessPluginCreator : public IPluginCreator
-{
-private:
-  static PluginFieldCollection fc_;
-  static std::vector<PluginField> attr_;
-  std::string namespace_;
-
-public:
-  PreprocessPluginCreator();
-  ~PreprocessPluginCreator();
-  const char * getPluginName() const noexcept override;
-  const char * getPluginVersion() const noexcept override;
-  const PluginFieldCollection * getFieldNames() noexcept override;
-  IPluginV2DynamicExt * createPlugin(
-    const char * name, const PluginFieldCollection * fc) noexcept override;
-  IPluginV2DynamicExt * deserializePlugin(
-    const char * name, const void * serialData, size_t serialLength) noexcept override;
-  void setPluginNamespace(const char * pluginNamespace) noexcept override;
-  const char * getPluginNamespace() const noexcept override;
-};
-
-}  // namespace nvinfer1
-#endif  // AUTOWARE__TENSORRT_BEVDET__PREPROCESS_PLUGIN_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
new file mode 100644
index 0000000000000..9bfd16d70c658
--- /dev/null
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
@@ -0,0 +1,46 @@
+// Copyright 2024 AutoCore, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
+#define AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
+
+#include <autoware_perception_msgs/msg/detected_objects.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
+#include <geometry_msgs/msg/point32.hpp>
+#include <geometry_msgs/msg/quaternion.hpp>
+#include <geometry_msgs/msg/vector3.hpp>
+#include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
+#include <string>
+#include <vector>
+#include <bevdet.h>
+namespace autoware::tensorrt_bevdet
+{
+uint8_t getSemanticType(const std::string & class_name);
+
+void box3DToDetectedObjects(
+  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & objects,
+  const std::vector<std::string> & class_names, float score_thre, const bool has_twist);
+
+// Get the rotation and translation from a geometry_msgs::msg::TransformStamped
+void getTransform(
+  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
+  Eigen::Translation3f & translation);
+
+// Get the camera intrinsics from a sensor_msgs::msg::CameraInfo
+void getCameraIntrinsics(
+  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
+
+
+} // namespace autoware::tensorrt_bevdet
+#endif  // AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
\ No newline at end of file
diff --git a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
index 0017ec5748cc8..bbd585833f3ea 100644
--- a/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
+++ b/perception/autoware_tensorrt_bevdet/launch/tensorrt_bevdet.launch.xml
@@ -25,7 +25,7 @@
   <arg name="param_file" default="$(find-pkg-share autoware_tensorrt_bevdet)/config/bevdet.param.yaml"/>
 
   <!-- bevdet node -->
-  <node pkg="autoware_tensorrt_bevdet" exec="autoware_tensorrt_bevdet_node" name="tensorrt_bevdet_node" output="screen">
+  <node pkg="autoware_tensorrt_bevdet" exec="autoware_tensorrt_bevdet_node" name="tensorrt_bevdet" output="screen">
     <!-- ros topic name -->
     <remap from="~/input/topic_img_fl" to="$(var input/img_fl)"/>
     <remap from="~/input/topic_img_f" to="$(var input/img_f)"/>
diff --git a/perception/autoware_tensorrt_bevdet/package.xml b/perception/autoware_tensorrt_bevdet/package.xml
index d73a11cf6bad9..f73c16e811d77 100644
--- a/perception/autoware_tensorrt_bevdet/package.xml
+++ b/perception/autoware_tensorrt_bevdet/package.xml
@@ -2,7 +2,7 @@
 <package format="3">
   <name>autoware_tensorrt_bevdet</name>
   <version>0.0.1</version>
-  <description>tensorrt library implementation for bevdet</description>
+  <description>This package is ported version toward Autoware from bevdet_vendor</description>
 
   <author email="cynthia.liu@autocore.ai">Cynthia</author>
   <maintainer email="cynthia.liu@autocore.ai">Cynthia</maintainer>
@@ -11,24 +11,16 @@
 
   <buildtool_depend>ament_cmake_auto</buildtool_depend>
   <buildtool_depend>autoware_cmake</buildtool_depend>
-  <buildtool_depend>cudnn_cmake_module</buildtool_depend>
-  <buildtool_depend>tensorrt_cmake_module</buildtool_depend>
-
-  <buildtool_export_depend>cudnn_cmake_module</buildtool_export_depend>
-  <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
-
+ 
   <depend>autoware_perception_msgs</depend>
   <depend>cv_bridge</depend>
   <depend>geometry_msgs</depend>
   <depend>libopencv-dev</depend>
-  <depend>pcl_conversions</depend>
-  <depend>pcl_ros</depend>
   <depend>rclcpp</depend>
   <depend>rclcpp_components</depend>
   <depend>sensor_msgs</depend>
-  <depend>tensorrt_common</depend>
   <depend>tf2_geometry_msgs</depend>
-  <depend>yaml-cpp</depend>
+  <depend>bevdet_vendor</depend>
 
   <test_depend>ament_lint_auto</test_depend>
   <test_depend>autoware_lint_common</test_depend>
diff --git a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
deleted file mode 100644
index 0321bf103f049..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/alignbev_plugin.cu
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
-#include "autoware/tensorrt_bevdet/common.hpp"
-
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-
-static inline __device__ bool within_bounds_2d(int h, int w, int H, int W)
-{
-  return h >= 0 && h < H && w >= 0 && w < W;
-}
-
-template <typename T1, typename T2>
-__global__ void align_bev_kernel(
-  const int nthreads, const T1 * input, const float * trans, T2 * output, TensorDesc output_desc)
-{
-  int C = output_desc.shape[1];        // 80
-  int out_H = output_desc.shape[2];    // 128
-  int out_W = output_desc.shape[3];    // 128
-  int out_sN = output_desc.stride[0];  // 80 * 128 * 128
-  int out_sC = output_desc.stride[1];  // 128 * 128
-  int out_sH = output_desc.stride[2];  // 128
-  int out_sW = output_desc.stride[3];  // 1
-
-  CUDA_1D_KERNEL_LOOP(index, nthreads)
-  {
-    const int w = index % out_W;            //  j
-    const int h = (index / out_W) % out_H;  //  i
-    const int n = index / (out_H * out_W);  // batch
-
-    float ix =
-      trans[n * 6 + 0 * 3 + 0] * w + trans[n * 6 + 0 * 3 + 1] * h + trans[n * 6 + 0 * 3 + 2];
-    float iy =
-      trans[n * 6 + 1 * 3 + 0] * w + trans[n * 6 + 1 * 3 + 1] * h + trans[n * 6 + 1 * 3 + 2];
-
-    // NE, NW, SE, SW point
-    int ix_nw = static_cast<int>(::floor(ix));
-    int iy_nw = static_cast<int>(::floor(iy));
-    int ix_ne = ix_nw + 1;
-    int iy_ne = iy_nw;
-    int ix_sw = ix_nw;
-    int iy_sw = iy_nw + 1;
-    int ix_se = ix_nw + 1;
-    int iy_se = iy_nw + 1;
-
-    T2 nw = (ix_se - ix) * (iy_se - iy);
-    T2 ne = (ix - ix_sw) * (iy_sw - iy);
-    T2 sw = (ix_ne - ix) * (iy - iy_ne);
-    T2 se = (ix - ix_nw) * (iy - iy_nw);
-
-    // bilinear
-    auto inp_ptr_NC = input + n * out_sN;
-    auto out_ptr_NCHW = output + n * out_sN + h * out_sH + w * out_sW;
-    for (int c = 0; c < C; ++c, inp_ptr_NC += out_sC, out_ptr_NCHW += out_sC) {
-      *out_ptr_NCHW = static_cast<T2>(0);
-      if (within_bounds_2d(iy_nw, ix_nw, out_H, out_W)) {
-        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_nw * out_sH + ix_nw * out_sW]) * nw;
-      }
-      if (within_bounds_2d(iy_ne, ix_ne, out_H, out_W)) {
-        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_ne * out_sH + ix_ne * out_sW]) * ne;
-      }
-      if (within_bounds_2d(iy_sw, ix_sw, out_H, out_W)) {
-        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_sw * out_sH + ix_sw * out_sW]) * sw;
-      }
-      if (within_bounds_2d(iy_se, ix_se, out_H, out_W)) {
-        *out_ptr_NCHW += static_cast<T2>(inp_ptr_NC[iy_se * out_sH + ix_se * out_sW]) * se;
-      }
-    }
-  }
-}
-
-void create_desc(const int * dims, int nb_dims, TensorDesc & desc)
-{
-  memcpy(&desc.shape[0], dims, sizeof(int) * nb_dims);
-  desc.stride[nb_dims - 1] = 1;
-  for (int i = nb_dims - 2; i >= 0; --i) {
-    desc.stride[i] = desc.stride[i + 1] * desc.shape[i + 1];
-  }
-}
-
-namespace nvinfer1
-{
-// class AlignBEVPlugin
-AlignBEVPlugin::AlignBEVPlugin(const std::string & name) : name_(name)
-{
-}
-
-AlignBEVPlugin::AlignBEVPlugin(const std::string & name, const void * buffer, size_t length)
-: name_(name)
-{
-  memcpy(&m_, buffer, sizeof(m_));
-}
-
-AlignBEVPlugin::~AlignBEVPlugin()
-{
-}
-
-IPluginV2DynamicExt * AlignBEVPlugin::clone() const noexcept
-{
-  auto p = new AlignBEVPlugin(name_, &m_, sizeof(m_));
-  p->setPluginNamespace(namespace_.c_str());
-  return p;
-}
-
-int32_t AlignBEVPlugin::getNbOutputs() const noexcept
-{
-  return 1;
-}
-
-DataType AlignBEVPlugin::getOutputDataType(
-  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
-{
-  return DataType::kFLOAT;  // FIXME
-}
-
-DimsExprs AlignBEVPlugin::getOutputDimensions(
-  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-  IExprBuilder & exprBuilder) noexcept
-{
-  return inputs[0];
-}
-
-bool AlignBEVPlugin::supportsFormatCombination(
-  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
-{
-  // adj_feat
-  if (pos == 0) {
-    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-           inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 2) {  // out
-    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-           inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 1) {  // transform
-    return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == TensorFormat::kLINEAR;
-  }
-  return false;
-}
-
-size_t AlignBEVPlugin::getWorkspaceSize(
-  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-  int32_t nbOutputs) const noexcept
-{
-  return 0;
-}
-
-int32_t AlignBEVPlugin::enqueue(
-  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-  const void * const * inputs, void * const * outputs, void * workspace,
-  cudaStream_t stream) noexcept
-{
-  // TODO
-  // inputs[0] == adj_feat  b x 8 x 80 x 128 x 128
-  // inputs[1] == transform b x 8 x 6
-
-  int bev_channel = inputDesc[0].dims.d[2];
-  int bev_h = inputDesc[0].dims.d[3];
-  int bev_w = inputDesc[0].dims.d[4];
-  int adj_num = inputDesc[0].dims.d[1];
-
-  int output_dim[4] = {8, bev_channel, bev_h, bev_w};
-
-  TensorDesc output_desc;
-  create_desc(output_dim, 4, output_desc);
-
-  int count = 1;
-  for (int i = 0; i < 4; ++i) {
-    if (i == 1) {
-      continue;
-    }
-    count *= output_desc.shape[i];
-  }
-
-  switch (int(outputDesc[0].type)) {
-    case int(DataType::kFLOAT):
-      if (inputDesc[0].type == DataType::kFLOAT) {
-        // align : fp32, fp32;
-        align_bev_kernel<float, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-          count, reinterpret_cast<const float *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
-          output_desc);
-      } else {
-        // align : fp16, fp32
-        align_bev_kernel<__half, float><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-          count, reinterpret_cast<const __half *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<float *>(outputs[0]),
-          output_desc);
-      }
-      break;
-    case int(DataType::kHALF):
-      if (inputDesc[0].type == DataType::kFLOAT) {
-        // align : fp32, fp16
-        align_bev_kernel<float, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-          count, reinterpret_cast<const float *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
-          output_desc);
-      } else {
-        // align : fp16, fp16
-        align_bev_kernel<__half, __half><<<GET_BLOCKS(count), NUM_THREADS, 0, stream>>>(
-          count, reinterpret_cast<const __half *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<__half *>(outputs[0]),
-          output_desc);
-      }
-      break;
-    default:  // should NOT be here
-      RCLCPP_ERROR(rclcpp::get_logger("AlignBEVPlugin"), "\tUnsupported datatype!");
-  }
-
-  return 0;
-}
-
-void AlignBEVPlugin::destroy() noexcept
-{
-  delete this;
-  return;
-}
-
-void AlignBEVPlugin::configurePlugin(
-  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-  int32_t nbOutputs) noexcept
-{
-  return;
-}
-
-int32_t AlignBEVPlugin::initialize() noexcept
-{
-  return 0;
-}
-
-void AlignBEVPlugin::terminate() noexcept
-{
-  return;
-}
-
-size_t AlignBEVPlugin::getSerializationSize() const noexcept
-{
-  return sizeof(m_);
-}
-
-void AlignBEVPlugin::serialize(void * buffer) const noexcept
-{
-  memcpy(buffer, &m_, sizeof(m_));
-  return;
-}
-
-void AlignBEVPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * AlignBEVPlugin::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * AlignBEVPlugin::getPluginType() const noexcept
-{
-  return ALIGN_PLUGIN_NAME;
-}
-
-const char * AlignBEVPlugin::getPluginVersion() const noexcept
-{
-  return ALIGN_PLUGIN_VERSION;
-}
-
-void AlignBEVPlugin::attachToContext(
-  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
-{
-  return;
-}
-
-void AlignBEVPlugin::detachFromContext() noexcept
-{
-  return;
-}
-
-// class AlignBEVPluginCreator
-PluginFieldCollection AlignBEVPluginCreator::fc_{};
-std::vector<PluginField> AlignBEVPluginCreator::attr_;
-
-AlignBEVPluginCreator::AlignBEVPluginCreator()
-{
-  attr_.clear();
-  fc_.nbFields = attr_.size();
-  fc_.fields = attr_.data();
-}
-
-AlignBEVPluginCreator::~AlignBEVPluginCreator()
-{
-}
-
-IPluginV2DynamicExt * AlignBEVPluginCreator::createPlugin(
-  const char * name, const PluginFieldCollection * fc) noexcept
-{
-  // const PluginField *fields = fc->fields;
-  AlignBEVPlugin * pObj = new AlignBEVPlugin(name);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-IPluginV2DynamicExt * AlignBEVPluginCreator::deserializePlugin(
-  const char * name, const void * serialData, size_t serialLength) noexcept
-{
-  AlignBEVPlugin * pObj = new AlignBEVPlugin(name, serialData, serialLength);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-void AlignBEVPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * AlignBEVPluginCreator::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * AlignBEVPluginCreator::getPluginName() const noexcept
-{
-  return ALIGN_PLUGIN_NAME;
-}
-
-const char * AlignBEVPluginCreator::getPluginVersion() const noexcept
-{
-  return ALIGN_PLUGIN_VERSION;
-}
-
-const PluginFieldCollection * AlignBEVPluginCreator::getFieldNames() noexcept
-{
-  return &fc_;
-}
-
-REGISTER_TENSORRT_PLUGIN(AlignBEVPluginCreator);
-
-}  // namespace nvinfer1
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
deleted file mode 100644
index 0296a829f1e35..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/bevdet.cpp
+++ /dev/null
@@ -1,687 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, bevpool, maxnum,
-// zgrid cspell:ignore gridbev, egobev, adjgrid, currgrid
-#include "autoware/tensorrt_bevdet/bevdet.hpp"
-
-#include "autoware/tensorrt_bevdet/alignbev_plugin.hpp"
-#include "autoware/tensorrt_bevdet/bevpool_plugin.hpp"
-#include "autoware/tensorrt_bevdet/gatherbev_plugin.hpp"
-#include "autoware/tensorrt_bevdet/preprocess_plugin.hpp"
-
-#include <thrust/functional.h>
-#include <thrust/sort.h>
-
-#include <chrono>
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-
-using std::chrono::duration;
-using std::chrono::high_resolution_clock;
-
-namespace autoware::tensorrt_bevdet
-{
-BEVDet::BEVDet(
-  const std::string & config_file, int n_img, std::vector<Eigen::Matrix3f> cams_intrin,
-  std::vector<Eigen::Quaternion<float>> cams2ego_rot,
-  std::vector<Eigen::Translation3f> cams2ego_trans, const std::string & onnx_file,
-  const std::string & engine_file)
-: cams_intrin_(cams_intrin), cams2ego_rot_(cams2ego_rot), cams2ego_trans_(cams2ego_trans)
-{
-  initParams(config_file);
-  if (n_img != n_img_) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("BEVDet"), "BEVDet need %d images, but given %d images!", n_img_, n_img);
-  }
-  auto start = high_resolution_clock::now();
-
-  std::vector<int> ranks_bev;
-  std::vector<int> ranks_depth;
-  std::vector<int> ranks_feat;
-  std::vector<int> interval_starts;
-  std::vector<int> interval_lengths;
-
-  initViewTransformer(ranks_bev, ranks_depth, ranks_feat, interval_starts, interval_lengths);
-  auto end = high_resolution_clock::now();
-  duration<float> t = end - start;
-  RCLCPP_INFO(
-    rclcpp::get_logger("BEVDet"), "InitVewTransformer cost time : %.4lf ms", t.count() * 1000);
-
-  if (access(engine_file.c_str(), F_OK) == 0) {
-    RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Inference engine prepared.");
-  } else {
-    // onnx to engine
-    RCLCPP_WARN(
-      rclcpp::get_logger("BEVDet"), "Could not find %s, try making TensorRT engine from onnx",
-      engine_file.c_str());
-    exportEngine(onnx_file, engine_file);
-  }
-  initEngine(engine_file);  // FIXME
-  mallocDeviceMemory();
-
-  if (use_adj_) {
-    adj_frame_ptr_.reset(new AdjFrame(adj_num_, trt_buffer_sizes_[buffer_map_["curr_bevfeat"]]));
-  }
-
-  cam_params_host_.resize(n_img_ * cam_params_size_);
-
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_bev"]], ranks_bev.data(), valid_feat_num_ * sizeof(int),
-    cudaMemcpyHostToDevice));
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_depth"]], ranks_depth.data(), valid_feat_num_ * sizeof(int),
-    cudaMemcpyHostToDevice));
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["ranks_feat"]], ranks_feat.data(), valid_feat_num_ * sizeof(int),
-    cudaMemcpyHostToDevice));
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["interval_starts"]], interval_starts.data(),
-    unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["interval_lengths"]], interval_lengths.data(),
-    unique_bev_num_ * sizeof(int), cudaMemcpyHostToDevice));
-
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["mean"]], mean_.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["std"]], std_.data(), 3 * sizeof(float), cudaMemcpyHostToDevice));
-}
-
-void BEVDet::initCamParams(
-  const std::vector<Eigen::Quaternion<float>> & curr_cams2ego_rot,
-  const std::vector<Eigen::Translation3f> & curr_cams2ego_trans,
-  const std::vector<Eigen::Matrix3f> & curr_cams_intrin)
-{
-  for (int i = 0; i < n_img_; i++) {
-    cam_params_host_[i * cam_params_size_ + 0] = curr_cams_intrin[i](0, 0);
-    cam_params_host_[i * cam_params_size_ + 1] = curr_cams_intrin[i](1, 1);
-    cam_params_host_[i * cam_params_size_ + 2] = curr_cams_intrin[i](0, 2);
-    cam_params_host_[i * cam_params_size_ + 3] = curr_cams_intrin[i](1, 2);
-    cam_params_host_[i * cam_params_size_ + 4] = post_rot_(0, 0);
-    cam_params_host_[i * cam_params_size_ + 5] = post_rot_(0, 1);
-    cam_params_host_[i * cam_params_size_ + 6] = post_trans_.translation()(0);
-    cam_params_host_[i * cam_params_size_ + 7] = post_rot_(1, 0);
-    cam_params_host_[i * cam_params_size_ + 8] = post_rot_(1, 1);
-    cam_params_host_[i * cam_params_size_ + 9] = post_trans_.translation()(1);
-    cam_params_host_[i * cam_params_size_ + 10] = 1.f;  // bda 0 0
-    cam_params_host_[i * cam_params_size_ + 11] = 0.f;  // bda 0 1
-    cam_params_host_[i * cam_params_size_ + 12] = 0.f;  // bda 1 0
-    cam_params_host_[i * cam_params_size_ + 13] = 1.f;  // bda 1 1
-    cam_params_host_[i * cam_params_size_ + 14] = 1.f;  // bda 2 2
-    cam_params_host_[i * cam_params_size_ + 15] = curr_cams2ego_rot[i].matrix()(0, 0);
-    cam_params_host_[i * cam_params_size_ + 16] = curr_cams2ego_rot[i].matrix()(0, 1);
-    cam_params_host_[i * cam_params_size_ + 17] = curr_cams2ego_rot[i].matrix()(0, 2);
-    cam_params_host_[i * cam_params_size_ + 18] = curr_cams2ego_trans[i].translation()(0);
-    cam_params_host_[i * cam_params_size_ + 19] = curr_cams2ego_rot[i].matrix()(1, 0);
-    cam_params_host_[i * cam_params_size_ + 20] = curr_cams2ego_rot[i].matrix()(1, 1);
-    cam_params_host_[i * cam_params_size_ + 21] = curr_cams2ego_rot[i].matrix()(1, 2);
-    cam_params_host_[i * cam_params_size_ + 22] = curr_cams2ego_trans[i].translation()(1);
-    cam_params_host_[i * cam_params_size_ + 23] = curr_cams2ego_rot[i].matrix()(2, 0);
-    cam_params_host_[i * cam_params_size_ + 24] = curr_cams2ego_rot[i].matrix()(2, 1);
-    cam_params_host_[i * cam_params_size_ + 25] = curr_cams2ego_rot[i].matrix()(2, 2);
-    cam_params_host_[i * cam_params_size_ + 26] = curr_cams2ego_trans[i].translation()(2);
-  }
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["cam_params"]], cam_params_host_.data(),
-    trt_buffer_sizes_[buffer_map_["cam_params"]], cudaMemcpyHostToDevice));
-}
-
-void BEVDet::initParams(const std::string & config_file)
-{
-  mean_ = std::vector<float>(3);
-  std_ = std::vector<float>(3);
-
-  YAML::Node model_config = YAML::LoadFile(config_file);
-  n_img_ = model_config["data_config"]["Ncams"].as<int>();
-  src_img_h_ = model_config["data_config"]["src_size"][0].as<int>();
-  src_img_w_ = model_config["data_config"]["src_size"][1].as<int>();
-  input_img_h_ = model_config["data_config"]["input_size"][0].as<int>();
-  input_img_w_ = model_config["data_config"]["input_size"][1].as<int>();
-  resize_radio_ = model_config["data_config"]["resize_radio"].as<float>();
-  crop_h_ = model_config["data_config"]["crop"][0].as<int>();
-  crop_w_ = model_config["data_config"]["crop"][1].as<int>();
-  mean_[0] = model_config["mean"][0].as<float>();
-  mean_[1] = model_config["mean"][1].as<float>();
-  mean_[2] = model_config["mean"][2].as<float>();
-  std_[0] = model_config["std"][0].as<float>();
-  std_[1] = model_config["std"][1].as<float>();
-  std_[2] = model_config["std"][2].as<float>();
-  down_sample_ = model_config["model"]["down_sample"].as<int>();
-  depth_start_ = model_config["grid_config"]["depth"][0].as<float>();
-  depth_end_ = model_config["grid_config"]["depth"][1].as<float>();
-  depth_step_ = model_config["grid_config"]["depth"][2].as<float>();
-  x_start_ = model_config["grid_config"]["x"][0].as<float>();
-  x_end_ = model_config["grid_config"]["x"][1].as<float>();
-  x_step_ = model_config["grid_config"]["x"][2].as<float>();
-  y_start_ = model_config["grid_config"]["y"][0].as<float>();
-  y_end_ = model_config["grid_config"]["y"][1].as<float>();
-  y_step_ = model_config["grid_config"]["y"][2].as<float>();
-  z_start_ = model_config["grid_config"]["z"][0].as<float>();
-  z_end_ = model_config["grid_config"]["z"][1].as<float>();
-  z_step_ = model_config["grid_config"]["z"][2].as<float>();
-  bevpool_channel_ = model_config["model"]["bevpool_channels"].as<int>();
-  nms_pre_maxnum_ = model_config["test_cfg"]["max_per_img"].as<int>();
-  nms_post_maxnum_ = model_config["test_cfg"]["post_max_size"].as<int>();
-  score_thresh_ = model_config["test_cfg"]["score_threshold"].as<float>();
-  nms_overlap_thresh_ = model_config["test_cfg"]["nms_thr"][0].as<float>();
-  use_depth_ = model_config["use_depth"].as<bool>();
-  use_adj_ = model_config["use_adj"].as<bool>();
-  transform_size_ = model_config["transform_size"].as<int>();
-  cam_params_size_ = model_config["cam_params_size"].as<int>();
-
-  std::vector<std::vector<float>> nms_factor_temp =
-    model_config["test_cfg"]["nms_rescale_factor"].as<std::vector<std::vector<float>>>();
-  nms_rescale_factor_.clear();
-  for (const auto & task_factors : nms_factor_temp) {
-    for (float factor : task_factors) {
-      nms_rescale_factor_.push_back(factor);
-    }
-  }
-
-  std::vector<std::vector<std::string>> class_name_pre_task;
-  class_num_ = 0;
-  YAML::Node tasks = model_config["model"]["tasks"];
-  class_num_pre_task_ = std::vector<int>();
-  for (auto it : tasks) {
-    int num = it["num_class"].as<int>();
-    class_num_pre_task_.push_back(num);
-    class_num_ += num;
-    class_name_pre_task.push_back(it["class_names"].as<std::vector<std::string>>());
-  }
-
-  YAML::Node common_head_channel = model_config["model"]["common_head"]["channels"];
-  YAML::Node common_head_name = model_config["model"]["common_head"]["names"];
-  for (size_t i = 0; i < common_head_channel.size(); i++) {
-    out_num_task_head_[common_head_name[i].as<std::string>()] = common_head_channel[i].as<int>();
-  }
-
-  feat_h_ = input_img_h_ / down_sample_;
-  feat_w_ = input_img_w_ / down_sample_;
-  depth_num_ = (depth_end_ - depth_start_) / depth_step_;
-  xgrid_num_ = (x_end_ - x_start_) / x_step_;
-  ygrid_num_ = (y_end_ - y_start_) / y_step_;
-  zgrid_num_ = (z_end_ - z_start_) / z_step_;
-  bev_h_ = ygrid_num_;
-  bev_w_ = xgrid_num_;
-
-  post_rot_ << resize_radio_, 0, 0, 0, resize_radio_, 0, 0, 0, 1;
-  post_trans_.translation() << -crop_w_, -crop_h_, 0;
-
-  adj_num_ = 0;
-  if (use_adj_) {
-    adj_num_ = model_config["adj_num"].as<int>();
-  }
-
-  postprocess_ptr_.reset(new PostprocessGPU(
-    class_num_, score_thresh_, nms_overlap_thresh_, nms_pre_maxnum_, nms_post_maxnum_, down_sample_,
-    bev_h_, bev_w_, x_step_, y_step_, x_start_, y_start_, class_num_pre_task_,
-    nms_rescale_factor_));
-}
-
-void BEVDet::mallocDeviceMemory()
-{
-  trt_buffer_sizes_.resize(trt_engine_->getNbBindings());
-  trt_buffer_dev_ = reinterpret_cast<void **>(new void *[trt_engine_->getNbBindings()]);
-  for (int i = 0; i < trt_engine_->getNbBindings(); i++) {
-    nvinfer1::Dims32 dim = trt_context_->getBindingDimensions(i);
-    size_t size = 1;
-    for (int j = 0; j < dim.nbDims; j++) {
-      size *= dim.d[j];
-    }
-    size *= dataTypeToSize(trt_engine_->getBindingDataType(i));
-    trt_buffer_sizes_[i] = size;
-    CHECK_CUDA(cudaMalloc(&trt_buffer_dev_[i], size));
-  }
-
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "img num binding : %d", trt_engine_->getNbBindings());
-
-  post_buffer_ = reinterpret_cast<void **>(new void *[class_num_pre_task_.size() * 6]);
-  for (size_t i = 0; i < class_num_pre_task_.size(); i++) {
-    post_buffer_[i * 6 + 0] = trt_buffer_dev_[buffer_map_["reg_" + std::to_string(i)]];
-    post_buffer_[i * 6 + 1] = trt_buffer_dev_[buffer_map_["height_" + std::to_string(i)]];
-    post_buffer_[i * 6 + 2] = trt_buffer_dev_[buffer_map_["dim_" + std::to_string(i)]];
-    post_buffer_[i * 6 + 3] = trt_buffer_dev_[buffer_map_["rot_" + std::to_string(i)]];
-    post_buffer_[i * 6 + 4] = trt_buffer_dev_[buffer_map_["vel_" + std::to_string(i)]];
-    post_buffer_[i * 6 + 5] = trt_buffer_dev_[buffer_map_["heatmap_" + std::to_string(i)]];
-  }
-
-  return;
-}
-
-void BEVDet::initViewTransformer(
-  std::vector<int> & ranks_bev, std::vector<int> & ranks_depth, std::vector<int> & ranks_feat,
-  std::vector<int> & interval_starts, std::vector<int> & interval_lengths)
-{
-  int num_points = n_img_ * depth_num_ * feat_h_ * feat_w_;
-  std::vector<Eigen::Vector3f> frustum(num_points);
-
-  for (int i = 0; i < n_img_; i++) {
-    for (int d_ = 0; d_ < depth_num_; d_++) {
-      for (int h_ = 0; h_ < feat_h_; h_++) {
-        for (int w_ = 0; w_ < feat_w_; w_++) {
-          int offset =
-            i * depth_num_ * feat_h_ * feat_w_ + d_ * feat_h_ * feat_w_ + h_ * feat_w_ + w_;
-          frustum[offset].x() = static_cast<float>(w_) * (input_img_w_ - 1) / (feat_w_ - 1);
-          frustum[offset].y() = static_cast<float>(h_) * (input_img_h_ - 1) / (feat_h_ - 1);
-          frustum[offset].z() = static_cast<float>(d_) * depth_step_ + depth_start_;
-
-          // eliminate post transformation
-          frustum[offset] -= post_trans_.translation();
-          frustum[offset] = post_rot_.inverse() * frustum[offset];
-          //
-          frustum[offset].x() *= frustum[offset].z();
-          frustum[offset].y() *= frustum[offset].z();
-          // img to ego -> rot -> trans
-          frustum[offset] = cams2ego_rot_[i] * cams_intrin_[i].inverse() * frustum[offset] +
-                            cams2ego_trans_[i].translation();
-
-          // voxelization
-          frustum[offset] -= Eigen::Vector3f(x_start_, y_start_, z_start_);
-          frustum[offset].x() = static_cast<int>(frustum[offset].x() / x_step_);
-          frustum[offset].y() = static_cast<int>(frustum[offset].y() / y_step_);
-          frustum[offset].z() = static_cast<int>(frustum[offset].z() / z_step_);
-        }
-      }
-    }
-  }
-
-  std::vector<int> _ranks_depth(num_points);
-  std::vector<int> _ranks_feat(num_points);
-
-  for (int i = 0; i < num_points; i++) {
-    _ranks_depth[i] = i;
-  }
-  for (int i = 0; i < n_img_; i++) {
-    for (int d_ = 0; d_ < depth_num_; d_++) {
-      for (int u = 0; u < feat_h_ * feat_w_; u++) {
-        int offset = i * (depth_num_ * feat_h_ * feat_w_) + d_ * (feat_h_ * feat_w_) + u;
-        _ranks_feat[offset] = i * feat_h_ * feat_w_ + u;
-      }
-    }
-  }
-
-  std::vector<int> kept;
-  for (int i = 0; i < num_points; i++) {
-    if (
-      static_cast<int>(frustum[i].x()) >= 0 && static_cast<int>(frustum[i].x()) < xgrid_num_ &&
-      static_cast<int>(frustum[i].y()) >= 0 && static_cast<int>(frustum[i].y()) < ygrid_num_ &&
-      static_cast<int>(frustum[i].z()) >= 0 && static_cast<int>(frustum[i].z()) < zgrid_num_) {
-      kept.push_back(i);
-    }
-  }
-
-  valid_feat_num_ = kept.size();
-  ;
-  ranks_depth.resize(valid_feat_num_);
-  ranks_feat.resize(valid_feat_num_);
-  ranks_bev.resize(valid_feat_num_);
-  std::vector<int> order(valid_feat_num_);
-
-  for (int i = 0; i < valid_feat_num_; i++) {
-    Eigen::Vector3f & p = frustum[kept[i]];
-    ranks_bev[i] = static_cast<int>(p.z()) * xgrid_num_ * ygrid_num_ +
-                   static_cast<int>(p.y()) * xgrid_num_ + static_cast<int>(p.x());
-    order[i] = i;
-  }
-
-  thrust::sort_by_key(ranks_bev.begin(), ranks_bev.end(), order.begin());
-  for (int i = 0; i < valid_feat_num_; i++) {
-    ranks_depth[i] = _ranks_depth[kept[order[i]]];
-    ranks_feat[i] = _ranks_feat[kept[order[i]]];
-  }
-
-  interval_starts.push_back(0);
-  int len = 1;
-  for (int i = 1; i < valid_feat_num_; i++) {
-    if (ranks_bev[i] != ranks_bev[i - 1]) {
-      interval_starts.push_back(i);
-      interval_lengths.push_back(len);
-      len = 1;
-    } else {
-      len++;
-    }
-  }
-
-  interval_lengths.push_back(len);
-  unique_bev_num_ = interval_lengths.size();
-
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "valid_feat_num: %d", valid_feat_num_);
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "unique_bev_num: %d", unique_bev_num_);
-}
-
-void print_dim(nvinfer1::Dims dim, const std::string & name)
-{
-  std::ostringstream oss;
-  oss << name << " : ";
-  for (auto i = 0; i < dim.nbDims; i++) {
-    oss << dim.d[i] << ' ';
-  }
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "%s", oss.str().c_str());
-}
-
-int BEVDet::initEngine(const std::string & engine_file)
-{
-  if (deserializeTRTEngine(engine_file, &trt_engine_)) {
-    return EXIT_FAILURE;
-  }
-
-  if (trt_engine_ == nullptr) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to deserialize engine file!");
-    return EXIT_FAILURE;
-  }
-  trt_context_ = trt_engine_->createExecutionContext();
-
-  if (trt_context_ == nullptr) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed to create TensorRT Execution Context!");
-    return EXIT_FAILURE;
-  }
-
-  // set bindings
-  std::vector<nvinfer1::Dims32> shapes{
-    {4, {n_img_, 3, src_img_h_, src_img_w_ / 4}},
-    {1, {3}},
-    {1, {3}},
-    {3, {1, n_img_, cam_params_size_}},
-    {1, {valid_feat_num_}},
-    {1, {valid_feat_num_}},
-    {1, {valid_feat_num_}},
-    {1, {unique_bev_num_}},
-    {1, {unique_bev_num_}},
-    {5, {1, adj_num_, bevpool_channel_, bev_h_, bev_w_}},
-    {3, {1, adj_num_, transform_size_}},
-    {2, {1, 1}}};
-
-  for (size_t i = 0; i < shapes.size(); i++) {
-    trt_context_->setBindingDimensions(i, shapes[i]);
-  }
-
-  buffer_map_.clear();
-  for (auto i = 0; i < trt_engine_->getNbBindings(); i++) {
-    auto dim = trt_context_->getBindingDimensions(i);
-    auto name = trt_engine_->getBindingName(i);
-    buffer_map_[name] = i;
-    print_dim(dim, name);
-  }
-
-  return EXIT_SUCCESS;
-}
-
-int BEVDet::deserializeTRTEngine(
-  const std::string & engine_file, nvinfer1::ICudaEngine ** engine_ptr)
-{
-  std::stringstream engine_stream;
-  engine_stream.seekg(0, engine_stream.beg);
-
-  std::ifstream file(engine_file);
-  engine_stream << file.rdbuf();
-  file.close();
-
-  nvinfer1::IRuntime * runtime = nvinfer1::createInferRuntime(g_logger_);
-  if (runtime == nullptr) {
-    std::string msg("Failed to build runtime parser!");
-    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-    return EXIT_FAILURE;
-  }
-  engine_stream.seekg(0, std::ios::end);
-  const int engine_size = engine_stream.tellg();
-
-  engine_stream.seekg(0, std::ios::beg);
-  void * engine_str = malloc(engine_size);
-  engine_stream.read(reinterpret_cast<char *>(engine_str), engine_size);
-
-  nvinfer1::ICudaEngine * engine = runtime->deserializeCudaEngine(engine_str, engine_size, NULL);
-  if (engine == nullptr) {
-    std::string msg("Failed to build engine parser!");
-    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-    return EXIT_FAILURE;
-  }
-  *engine_ptr = engine;
-  for (int bi = 0; bi < engine->getNbBindings(); bi++) {
-    if (engine->bindingIsInput(bi) == true) {
-      RCLCPP_INFO(
-        rclcpp::get_logger("BEVDet"), "Binding %d (%s): Input.", bi, engine->getBindingName(bi));
-    } else {
-      RCLCPP_INFO(
-        rclcpp::get_logger("BEVDet"), "Binding %d (%s): Output.", bi, engine->getBindingName(bi));
-    }
-  }
-  return EXIT_SUCCESS;
-}
-
-void BEVDet::getAdjBEVFeature(
-  const std::string & curr_scene_token, const Eigen::Quaternion<float> & ego2global_rot,
-  const Eigen::Translation3f & ego2global_trans)
-{
-  int flag = 1;
-  if (adj_frame_ptr_->lastScenesToken() != curr_scene_token) {
-    adj_frame_ptr_->reset();
-    flag = 0;
-  }
-
-  // the smaller the idx, the newer th adj_bevfeat
-  for (int i = 0; i < adj_num_; i++) {
-    const void * adj_buffer = adj_frame_ptr_->getFrameBuffer(i);
-
-    size_t buf_size = trt_buffer_sizes_[buffer_map_["adj_feats"]] / adj_num_;
-
-    CHECK_CUDA(cudaMemcpy(
-      reinterpret_cast<char *>(trt_buffer_dev_[buffer_map_["adj_feats"]]) + i * buf_size,
-      adj_buffer, buf_size, cudaMemcpyDeviceToDevice));
-
-    Eigen::Quaternion<float> adj_ego2global_rot;
-    Eigen::Translation3f adj_ego2global_trans;
-    adj_frame_ptr_->getEgo2Global(i, adj_ego2global_rot, adj_ego2global_trans);
-
-    getCurr2AdjTransform(
-      ego2global_rot, adj_ego2global_rot, ego2global_trans, adj_ego2global_trans,
-      reinterpret_cast<float *>(trt_buffer_dev_[buffer_map_["transforms"]]) + i * transform_size_);
-  }
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["flag"]], &flag, trt_buffer_sizes_[buffer_map_["flag"]],
-    cudaMemcpyHostToDevice));
-}
-
-void BEVDet::getCurr2AdjTransform(
-  const Eigen::Quaternion<float> & curr_ego2global_rot,
-  const Eigen::Quaternion<float> & adj_ego2global_rot,
-  const Eigen::Translation3f & curr_ego2global_trans,
-  const Eigen::Translation3f & adj_ego2global_trans, float * transform_dev)
-{
-  Eigen::Matrix4f curr_e2g_transform;
-  Eigen::Matrix4f adj_e2g_transform;
-
-  for (int i = 0; i < 3; i++) {
-    for (int j = 0; j < 3; j++) {
-      curr_e2g_transform(i, j) = curr_ego2global_rot.matrix()(i, j);
-      adj_e2g_transform(i, j) = adj_ego2global_rot.matrix()(i, j);
-    }
-  }
-  for (int i = 0; i < 3; i++) {
-    curr_e2g_transform(i, 3) = curr_ego2global_trans.vector()(i);
-    adj_e2g_transform(i, 3) = adj_ego2global_trans.vector()(i);
-
-    curr_e2g_transform(3, i) = 0.f;
-    adj_e2g_transform(3, i) = 0.f;
-  }
-  curr_e2g_transform(3, 3) = 1.f;
-  adj_e2g_transform(3, 3) = 1.f;
-
-  Eigen::Matrix4f currEgo2adjEgo = adj_e2g_transform.inverse() * curr_e2g_transform;
-  Eigen::Matrix3f currEgo2adjEgo_2d;
-  for (int i = 0; i < 2; i++) {
-    for (int j = 0; j < 2; j++) {
-      currEgo2adjEgo_2d(i, j) = currEgo2adjEgo(i, j);
-    }
-  }
-  currEgo2adjEgo_2d(2, 0) = 0.f;
-  currEgo2adjEgo_2d(2, 1) = 0.f;
-  currEgo2adjEgo_2d(2, 2) = 1.f;
-  currEgo2adjEgo_2d(0, 2) = currEgo2adjEgo(0, 3);
-  currEgo2adjEgo_2d(1, 2) = currEgo2adjEgo(1, 3);
-
-  Eigen::Matrix3f gridbev2egobev;
-  gridbev2egobev(0, 0) = x_step_;
-  gridbev2egobev(1, 1) = y_step_;
-  gridbev2egobev(0, 2) = x_start_;
-  gridbev2egobev(1, 2) = y_start_;
-  gridbev2egobev(2, 2) = 1.f;
-
-  gridbev2egobev(0, 1) = 0.f;
-  gridbev2egobev(1, 0) = 0.f;
-  gridbev2egobev(2, 0) = 0.f;
-  gridbev2egobev(2, 1) = 0.f;
-
-  Eigen::Matrix3f currgrid2adjgrid = gridbev2egobev.inverse() * currEgo2adjEgo_2d * gridbev2egobev;
-
-  CHECK_CUDA(cudaMemcpy(
-    transform_dev, Eigen::Matrix3f(currgrid2adjgrid.transpose()).data(),
-    transform_size_ * sizeof(float), cudaMemcpyHostToDevice));
-}
-
-int BEVDet::doInfer(
-  const camsData & cam_data, std::vector<Box> & out_detections, float & cost_time, int idx)
-{
-  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "---------%d---------", idx + 1);
-
-  auto start = high_resolution_clock::now();
-  CHECK_CUDA(cudaMemcpy(
-    trt_buffer_dev_[buffer_map_["images"]], cam_data.imgs_dev,
-    trt_buffer_sizes_[buffer_map_["images"]], cudaMemcpyDeviceToDevice));
-
-  initCamParams(
-    cam_data.param.cams2ego_rot, cam_data.param.cams2ego_trans, cam_data.param.cams_intrin);
-
-  getAdjBEVFeature(
-    cam_data.param.scene_token, cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
-
-  if (!trt_context_->executeV2(trt_buffer_dev_)) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "BEVDet forward failing!");
-  }
-
-  adj_frame_ptr_->saveFrameBuffer(
-    trt_buffer_dev_[buffer_map_["curr_bevfeat"]], cam_data.param.scene_token,
-    cam_data.param.ego2global_rot, cam_data.param.ego2global_trans);
-
-  auto end = high_resolution_clock::now();
-
-  postprocess_ptr_->DoPostprocess(post_buffer_, out_detections);
-  CHECK_CUDA(cudaDeviceSynchronize());
-
-  auto post_end = high_resolution_clock::now();
-
-  duration<double> infer_t = post_end - start;
-  duration<double> engine_t = end - start;
-  duration<double> post_t = post_end - end;
-
-  cost_time = infer_t.count() * 1000;
-  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Inference time: %.5lf ms", engine_t.count() * 1000);
-  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Postprocess time: %.5lf ms", post_t.count() * 1000);
-  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Total time: %.5lf ms", infer_t.count() * 1000);
-  RCLCPP_DEBUG(rclcpp::get_logger("BEVDet"), "Detect %ld objects", out_detections.size());
-
-  return EXIT_SUCCESS;
-}
-
-void BEVDet::exportEngine(const std::string & onnxFile, const std::string & trtFile)
-{
-  CHECK_CUDA(cudaSetDevice(0));
-  nvinfer1::ICudaEngine * engine = nullptr;
-  std::vector<nvinfer1::Dims32> min_shapes{
-    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},    {3, {1, 6, 27}}, {1, {200000}},
-    {1, {200000}},         {1, {200000}}, {1, {8000}}, {1, {8000}},     {5, {1, 8, 80, 128, 128}},
-    {3, {1, 8, 6}},        {2, {1, 1}}};
-
-  std::vector<nvinfer1::Dims32> opt_shapes{
-    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},     {3, {1, 6, 27}}, {1, {356760}},
-    {1, {356760}},         {1, {356760}}, {1, {13360}}, {1, {13360}},    {5, {1, 8, 80, 128, 128}},
-    {3, {1, 8, 6}},        {2, {1, 1}}};
-
-  std::vector<nvinfer1::Dims32> max_shapes{
-    {4, {6, 3, 900, 400}}, {1, {3}},      {1, {3}},     {3, {1, 6, 27}}, {1, {370000}},
-    {1, {370000}},         {1, {370000}}, {1, {14000}}, {1, {14000}},    {5, {1, 8, 80, 128, 128}},
-    {3, {1, 8, 6}},        {2, {1, 1}}};
-  nvinfer1::IBuilder * builder = nvinfer1::createInferBuilder(g_logger_);
-  nvinfer1::INetworkDefinition * network =
-    builder->createNetworkV2(1U << int(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
-  nvinfer1::IOptimizationProfile * profile = builder->createOptimizationProfile();
-  nvinfer1::IBuilderConfig * config = builder->createBuilderConfig();
-  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 3UL << 32UL);
-
-  config->setFlag(nvinfer1::BuilderFlag::kFP16);
-
-  nvonnxparser::IParser * parser = nvonnxparser::createParser(*network, g_logger_);
-
-  if (!parser->parseFromFile(onnxFile.c_str(), static_cast<int>(g_logger_.reportable_severity))) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed parsing .onnx file!");
-    for (int i = 0; i < parser->getNbErrors(); ++i) {
-      auto * error = parser->getError(i);
-      RCLCPP_ERROR(
-        rclcpp::get_logger("BEVDet"), "Error code: %d, Description: %s",
-        static_cast<int>(error->code()), error->desc());
-    }
-
-    return;
-  }
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded parsing .onnx file!");
-
-  for (size_t i = 0; i < min_shapes.size(); i++) {
-    nvinfer1::ITensor * it = network->getInput(i);
-    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMIN, min_shapes[i]);
-    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kOPT, opt_shapes[i]);
-    profile->setDimensions(it->getName(), nvinfer1::OptProfileSelector::kMAX, max_shapes[i]);
-  }
-  config->addOptimizationProfile(profile);
-
-  nvinfer1::IHostMemory * engineString = builder->buildSerializedNetwork(*network, *config);
-  if (engineString == nullptr || engineString->size() == 0) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed building serialized engine!");
-    return;
-  }
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded building serialized engine!");
-
-  nvinfer1::IRuntime * runtime{nvinfer1::createInferRuntime(g_logger_)};
-  engine = runtime->deserializeCudaEngine(engineString->data(), engineString->size());
-  if (engine == nullptr) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed building engine!");
-    return;
-  }
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded building engine!");
-
-  std::ofstream engineFile(trtFile, std::ios::binary);
-  if (!engineFile) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed opening file to write");
-    return;
-  }
-  engineFile.write(static_cast<char *>(engineString->data()), engineString->size());
-  if (engineFile.fail()) {
-    RCLCPP_ERROR(rclcpp::get_logger("BEVDet"), "Failed saving .engine file!");
-    return;
-  }
-  RCLCPP_INFO(rclcpp::get_logger("BEVDet"), "Succeeded saving .engine file!");
-}
-
-BEVDet::~BEVDet()
-{
-  for (int i = 0; i < trt_engine_->getNbBindings(); i++) {
-    CHECK_CUDA(cudaFree(trt_buffer_dev_[i]));
-  }
-  delete[] trt_buffer_dev_;
-  delete[] post_buffer_;
-
-  trt_context_->destroy();
-  trt_engine_->destroy();
-}
-}  // namespace autoware::tensorrt_bevdet
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 46bac289cd501..13e7e38a2df97 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -14,119 +14,23 @@
 
 // cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm, RGBHWC,
 // BGRCHW
-#include "autoware/tensorrt_bevdet/bevdet_node.hpp"
-
-#include "autoware/tensorrt_bevdet/preprocess.hpp"
-
-using Label = autoware_perception_msgs::msg::ObjectClassification;
-std::map<int, std::vector<int>> colormap{
-  {0, {0, 0, 255}},  // dodger blue
-  {1, {0, 201, 87}}, {2, {0, 201, 87}}, {3, {160, 32, 240}}, {4, {3, 168, 158}}, {5, {255, 0, 0}},
-  {6, {255, 97, 0}}, {7, {30, 0, 255}}, {8, {255, 0, 0}},    {9, {0, 0, 255}},   {10, {0, 0, 0}}};
-
-uint8_t getSemanticType(const std::string & class_name)
-{
-  if (class_name == "car") {
-    return Label::CAR;
-  } else if (class_name == "truck") {
-    return Label::TRUCK;
-  } else if (class_name == "bus") {
-    return Label::BUS;
-  } else if (class_name == "trailer") {
-    return Label::TRAILER;
-  } else if (class_name == "bicycle") {
-    return Label::BICYCLE;
-  } else if (class_name == "motorcycle") {
-    return Label::MOTORCYCLE;
-  } else if (class_name == "pedestrian") {
-    return Label::PEDESTRIAN;
-  } else {
-    return Label::UNKNOWN;
-  }
-}
 
-void box3DToDetectedObjects(
-  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
-  const std::vector<std::string> & class_names, float score_thre, const bool has_twist = true)
-{
-  for (auto b : boxes) {
-    if (b.score < score_thre) continue;
-    autoware_perception_msgs::msg::DetectedObject obj;
-
-    Eigen::Vector3f center(b.x, b.y, b.z + b.h / 2.);
-
-    obj.existence_probability = b.score;
-    // classification
-    autoware_perception_msgs::msg::ObjectClassification classification;
-    classification.probability = 1.0f;
-    if (b.label >= 0 && static_cast<size_t>(b.label) < class_names.size()) {
-      classification.label = getSemanticType(class_names[b.label]);
-    } else {
-      classification.label = Label::UNKNOWN;
-    }
-    obj.classification.emplace_back(classification);
-
-    // pose and shape
-    geometry_msgs::msg::Point p;
-    p.x = center.x();
-    p.y = center.y();
-    p.z = center.z();
-    obj.kinematics.pose_with_covariance.pose.position = p;
-
-    tf2::Quaternion q;
-    q.setRPY(0, 0, b.r);
-    obj.kinematics.pose_with_covariance.pose.orientation = tf2::toMsg(q);
-
-    obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
-
-    geometry_msgs::msg::Vector3 v;
-    v.x = b.l;
-    v.y = b.w;
-    v.z = b.h;
-    obj.shape.dimensions = v;
-    if (has_twist) {
-      float vel_x = b.vx;
-      float vel_y = b.vy;
-      geometry_msgs::msg::Twist twist;
-      twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
-      twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - b.r);
-      obj.kinematics.twist_with_covariance.twist = twist;
-      obj.kinematics.has_twist = has_twist;
-    }
-
-    bevdet_objects.objects.emplace_back(obj);
-  }
-}
+#include "autoware/tensorrt_bevdet/bevdet_node.hpp"
+#include <preprocess.h>
 
-void getTransform(
-  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
-  Eigen::Translation3f & translation)
+namespace autoware
 {
-  rot = Eigen::Quaternion<float>(
-    transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y,
-    transform.transform.rotation.z);
-  translation = Eigen::Translation3f(
-    transform.transform.translation.x, transform.transform.translation.y,
-    transform.transform.translation.z);
-}
-
-void getCameraIntrinsics(
-  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics)
+namespace tensorrt_bevdet
 {
-  intrinsics << msg->k[0], msg->k[1], msg->k[2], msg->k[3], msg->k[4], msg->k[5], msg->k[6],
-    msg->k[7], msg->k[8];
-}
-
-TRTBEVDetNode::TRTBEVDetNode(
-  const std::string & node_name, const rclcpp::NodeOptions & node_options)
-: rclcpp::Node(node_name, node_options)
+TRTBEVDetNode::TRTBEVDetNode(const rclcpp::NodeOptions & node_options)
+: rclcpp::Node("tensorrt_bevdet_node", node_options)
 {  // Only start camera info subscription and tf listener at the beginning
-  img_N_ = this->declare_parameter<int>("data_params.N", 6);  // camera num 6
+  img_N_ = this->declare_parameter<int>("data_params.CAM_NUM", 6);  // camera num 6
 
   caminfo_received_ = std::vector<bool>(img_N_, false);
-  cams_intrin = std::vector<Eigen::Matrix3f>(img_N_);
-  cams2ego_rot = std::vector<Eigen::Quaternion<float>>(img_N_);
-  cams2ego_trans = std::vector<Eigen::Translation3f>(img_N_);
+  cams_intrin_ = std::vector<Eigen::Matrix3f>(img_N_);
+  cams2ego_rot_ = std::vector<Eigen::Quaternion<float>>(img_N_);
+  cams2ego_trans_ = std::vector<Eigen::Translation3f>(img_N_);
 
   tf_buffer_ = std::make_unique<tf2_ros::Buffer>(this->get_clock());
   tf_listener_ = std::make_shared<tf2_ros::TransformListener>(*tf_buffer_);
@@ -148,16 +52,15 @@ void TRTBEVDetNode::initModel()
   engine_file_ = this->declare_parameter<std::string>("engine_path", "bevdet_one_lt_d.engine");
 
   imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
-  class_names_ =
-    this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
+  class_names_ = this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
 
-  sampleData_.param = camParams(cams_intrin, cams2ego_rot, cams2ego_trans);
+  sampleData_.param = camParams(cams_intrin_, cams2ego_rot_, cams2ego_trans_);
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load image params!");
 
-  bevdet_ = std::make_shared<autoware::tensorrt_bevdet::BEVDet>(
+  bevdet_ = std::make_shared<BEVDet>(
     model_config_, img_N_, sampleData_.param.cams_intrin, sampleData_.param.cams2ego_rot,
     sampleData_.param.cams2ego_trans, onnx_file_, engine_file_);
 
@@ -195,9 +98,7 @@ void TRTBEVDetNode::startImageSubscription()
   using std::placeholders::_5;
   using std::placeholders::_6;
 
-  sub_f_img_.subscribe(
-    this, "~/input/topic_img_f",
-    rclcpp::QoS{1}.get_rmw_qos_profile());  // rmw_qos_profile_sensor_data
+  sub_f_img_.subscribe(this, "~/input/topic_img_f", rclcpp::QoS{1}.get_rmw_qos_profile());
   sub_b_img_.subscribe(this, "~/input/topic_img_b", rclcpp::QoS{1}.get_rmw_qos_profile());
 
   sub_fl_img_.subscribe(this, "~/input/topic_img_fl", rclcpp::QoS{1}.get_rmw_qos_profile());
@@ -218,30 +119,30 @@ void TRTBEVDetNode::startCameraInfoSubscription()
   // "CAM_BACK_RIGHT"]
   sub_fl_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_fl/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(0, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(0, msg); });
 
   sub_f_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_f/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(1, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(1, msg); });
 
   sub_fr_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_fr/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(2, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(2, msg); });
 
   sub_bl_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_bl/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(3, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(3, msg); });
 
   sub_b_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_b/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(4, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(4, msg); });
 
   sub_br_caminfo_ = this->create_subscription<sensor_msgs::msg::CameraInfo>(
     "~/input/topic_img_br/camera_info", rclcpp::QoS{1},
-    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { camera_info_callback(5, msg); });
+    [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(5, msg); });
 }
 
-void image_transport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
+void imageTransport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
 {
   uchar * temp = new uchar[width * height * 3];
   uchar * temp_gpu = nullptr;
@@ -280,7 +181,7 @@ void TRTBEVDetNode::callback(
   imgs.emplace_back(img_b);
   imgs.emplace_back(img_br);
 
-  image_transport(imgs, imgs_dev_, img_w_, img_h_);
+  imageTransport(imgs, imgs_dev_, img_w_, img_h_);
 
   // uchar *imgs_dev
   sampleData_.imgs_dev = imgs_dev_;
@@ -289,18 +190,18 @@ void TRTBEVDetNode::callback(
   ego_boxes.clear();
   float time = 0.f;
 
-  bevdet_->doInfer(sampleData_, ego_boxes, time);
+  bevdet_->DoInfer(sampleData_, ego_boxes, time);
 
   autoware_perception_msgs::msg::DetectedObjects bevdet_objects;
   bevdet_objects.header.frame_id = "base_link";
   bevdet_objects.header.stamp = msg_f_img->header.stamp;
 
-  box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_);
+  box3DToDetectedObjects(ego_boxes, bevdet_objects, class_names_, score_thre_, has_twist_);
 
   pub_boxes_->publish(bevdet_objects);
 }
 
-void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg)
+void TRTBEVDetNode::cameraInfoCallback(int idx, const sensor_msgs::msg::CameraInfo::SharedPtr msg)
 {
   if (caminfo_received_[idx])
     return;  // already received;  not expected to modify because of we init the model only once
@@ -312,15 +213,15 @@ void TRTBEVDetNode::camera_info_callback(int idx, const sensor_msgs::msg::Camera
   }
   Eigen::Matrix3f intrinsics;
   getCameraIntrinsics(msg, intrinsics);
-  cams_intrin[idx] = intrinsics;
+  cams_intrin_[idx] = intrinsics;
 
   Eigen::Quaternion<float> rot;
   Eigen::Translation3f translation;
   getTransform(
     tf_buffer_->lookupTransform("base_link", msg->header.frame_id, rclcpp::Time(0)), rot,
     translation);
-  cams2ego_rot[idx] = rot;
-  cams2ego_trans[idx] = translation;
+  cams2ego_rot_[idx] = rot;
+  cams2ego_trans_[idx] = translation;
 
   caminfo_received_[idx] = true;
   camera_info_received_flag_ =
@@ -331,12 +232,8 @@ TRTBEVDetNode::~TRTBEVDetNode()
 {
   delete imgs_dev_;
 }
-
-int main(int argc, char ** argv)
-{
-  rclcpp::init(argc, argv);
-  rclcpp::NodeOptions node_options;
-  auto bevdet_node = std::make_shared<TRTBEVDetNode>("tensorrt_bevdet_node", node_options);
-  rclcpp::spin(bevdet_node);
-  return 0;
-}
+}  // namespace tensorrt_bevdet
+}  // namespace autoware
+#include <rclcpp_components/register_node_macro.hpp>
+RCLCPP_COMPONENTS_REGISTER_NODE(
+  autoware::tensorrt_bevdet::TRTBEVDetNode)
diff --git a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu b/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
deleted file mode 100644
index a498a2a1685ba..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/bevpool_plugin.cu
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "autoware/tensorrt_bevdet/bevpool_plugin.hpp"
-#include "autoware/tensorrt_bevdet/common.hpp"
-
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-
-// input[0] depth            b*n x d x h x w
-// input[1] feat             b*n x c x h x w
-// input[2] ranks_depth      m
-// input[3] ranks_feat       m
-// input[4] ranks_bev        m
-// input[5] interval_starts  k
-// input[6] interval_lengths k
-
-// out[0]   bevfeat          b x c x h x w
-
-template <typename T1, typename T2>
-__global__ void bev_pool_v2_kernel(
-  int channel, int n_intervals, int map_size, int img_size, const T1 * __restrict__ depth,
-  const T1 * __restrict__ feat, const int * __restrict__ ranks_depth,
-  const int * __restrict__ ranks_feat, const int * __restrict__ ranks_bev,
-  const int * __restrict__ interval_starts, const int * __restrict__ interval_lengths,
-  T2 * __restrict__ out)
-{
-  CUDA_1D_KERNEL_LOOP(idx, n_intervals * channel)
-  {
-    int index = idx / channel;   // bev grid index
-    int curr_c = idx % channel;  // channel index
-    int interval_start = interval_starts[index];
-    int interval_length = interval_lengths[index];
-
-    int curr_step = curr_c * img_size;
-    int chan_step = channel * img_size;
-
-    T2 sum = 0;
-
-    int feat_offset = 0;
-    for (int i = 0; i < interval_length; i++) {
-      feat_offset = ranks_feat[interval_start + i] / img_size * chan_step + curr_step +
-                    ranks_feat[interval_start + i] % img_size;
-
-      sum += static_cast<T2>(feat[feat_offset]) *
-             static_cast<T2>(depth[ranks_depth[interval_start + i]]);
-    }
-    out[curr_c * map_size + ranks_bev[interval_start]] = sum;
-  }
-}
-
-namespace nvinfer1
-{
-// class BEVPoolPlugin
-BEVPoolPlugin::BEVPoolPlugin(const std::string & name, int bev_h, int bev_w, int n) : name_(name)
-{
-  m_.bev_h = bev_h;
-  m_.bev_w = bev_w;
-  m_.n = n;
-}
-
-BEVPoolPlugin::BEVPoolPlugin(const std::string & name, const void * buffer, size_t length)
-: name_(name)
-{
-  memcpy(&m_, buffer, sizeof(m_));
-}
-
-BEVPoolPlugin::~BEVPoolPlugin()
-{
-}
-
-IPluginV2DynamicExt * BEVPoolPlugin::clone() const noexcept
-{
-  auto p = new BEVPoolPlugin(name_, &m_, sizeof(m_));
-  p->setPluginNamespace(namespace_.c_str());
-  return p;
-}
-
-int32_t BEVPoolPlugin::getNbOutputs() const noexcept
-{
-  return 1;
-}
-
-DataType BEVPoolPlugin::getOutputDataType(
-  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
-{
-  return DataType::kFLOAT;
-}
-
-DimsExprs BEVPoolPlugin::getOutputDimensions(
-  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-  IExprBuilder & exprBuilder) noexcept
-{
-  // input[0] depth            b*n x d x h x w
-  // input[1] feat             b*n x c x h x w
-
-  // input[2] ranks_depth      m
-  // input[3] ranks_feat       m
-  // input[4] ranks_bev        m
-  // input[5] interval_starts  k
-  // input[6] interval_lengths k
-
-  // out[0]   bevfeat          b x c x h x w
-
-  DimsExprs ret;
-  ret.nbDims = inputs[1].nbDims;
-  ret.d[0] = exprBuilder.operation(
-    DimensionOperation::kFLOOR_DIV, *inputs[1].d[0], *exprBuilder.constant(m_.n));
-  ret.d[1] = inputs[1].d[1];
-  ret.d[2] = exprBuilder.constant(m_.bev_h);
-  ret.d[3] = exprBuilder.constant(m_.bev_w);
-
-  return ret;
-}
-
-bool BEVPoolPlugin::supportsFormatCombination(
-  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
-{
-  // depth
-  if (pos == 0) {
-    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-           inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 1) {  // feat
-    return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 7) {  // out
-    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-           inOut[pos].format == TensorFormat::kLINEAR;
-  } else {
-    return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
-  }
-  return false;
-}
-
-void BEVPoolPlugin::configurePlugin(
-  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-  int32_t nbOutputs) noexcept
-{
-  return;
-}
-
-size_t BEVPoolPlugin::getWorkspaceSize(
-  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-  int32_t nbOutputs) const noexcept
-{
-  return 0;
-}
-
-int32_t BEVPoolPlugin::enqueue(
-  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-  const void * const * inputs, void * const * outputs, void * workspace,
-  cudaStream_t stream) noexcept
-{
-  // input[0] == depth            b*n x d x h x w
-  // input[1] == feat             b*n x c x h x w
-  // input[2] == ranks_depth      m
-  // input[3] == ranks_feat       m
-  // input[4] == ranks_bev        m
-  // input[5] == interval_starts  k
-  // input[6] == interval_lengths k
-
-  int channel = inputDesc[1].dims.d[1];
-  int n_intervals = inputDesc[5].dims.d[0];
-  int map_size = m_.bev_h * m_.bev_w;
-  int img_size = inputDesc[0].dims.d[2] * inputDesc[0].dims.d[3];
-
-  dim3 grid(GET_BLOCKS(n_intervals * channel));
-  dim3 block(NUM_THREADS);
-
-  switch (int(outputDesc[0].type)) {
-    case int(DataType::kFLOAT):
-      if (inputDesc[0].type == DataType::kFLOAT) {
-        // fp32 fp32
-        bev_pool_v2_kernel<float, float><<<grid, block, 0, stream>>>(
-          channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
-          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
-          reinterpret_cast<float *>(outputs[0]));
-      } else {
-        // fp16 fp32
-        bev_pool_v2_kernel<__half, float><<<grid, block, 0, stream>>>(
-          channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
-          reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
-          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
-          reinterpret_cast<float *>(outputs[0]));
-      }
-      break;
-    case int(DataType::kHALF):
-      if (inputDesc[0].type == DataType::kFLOAT) {
-        // fp32 fp16
-        bev_pool_v2_kernel<float, __half><<<grid, block, 0, stream>>>(
-          channel, n_intervals, map_size, img_size, reinterpret_cast<const float *>(inputs[0]),
-          reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
-          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
-          reinterpret_cast<__half *>(outputs[0]));
-      } else {
-        // fp16 fp16
-        bev_pool_v2_kernel<__half, __half><<<grid, block, 0, stream>>>(
-          channel, n_intervals, map_size, img_size, reinterpret_cast<const __half *>(inputs[0]),
-          reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-          reinterpret_cast<const int *>(inputs[3]), reinterpret_cast<const int *>(inputs[4]),
-          reinterpret_cast<const int *>(inputs[5]), reinterpret_cast<const int *>(inputs[6]),
-          reinterpret_cast<__half *>(outputs[0]));
-      }
-      break;
-    default:  // should NOT be here
-      RCLCPP_ERROR(rclcpp::get_logger("BEVPoolPlugin"), "\tUnsupport datatype!");
-  }
-  return 0;
-}
-
-void BEVPoolPlugin::destroy() noexcept
-{
-  delete this;
-  return;
-}
-
-int32_t BEVPoolPlugin::initialize() noexcept
-{
-  return 0;
-}
-
-void BEVPoolPlugin::terminate() noexcept
-{
-  return;
-}
-
-size_t BEVPoolPlugin::getSerializationSize() const noexcept
-{
-  return sizeof(m_);
-}
-
-void BEVPoolPlugin::serialize(void * buffer) const noexcept
-{
-  memcpy(buffer, &m_, sizeof(m_));
-  return;
-}
-
-void BEVPoolPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * BEVPoolPlugin::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * BEVPoolPlugin::getPluginType() const noexcept
-{
-  return BEVPOOL_PLUGIN_NAME;
-}
-
-const char * BEVPoolPlugin::getPluginVersion() const noexcept
-{
-  return BEVPOOL_PLUGIN_VERSION;
-}
-
-void BEVPoolPlugin::attachToContext(
-  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
-{
-  return;
-}
-
-void BEVPoolPlugin::detachFromContext() noexcept
-{
-  return;
-}
-
-// class BEVPoolPluginCreator
-PluginFieldCollection BEVPoolPluginCreator::fc_{};
-std::vector<PluginField> BEVPoolPluginCreator::attr_;
-
-BEVPoolPluginCreator::BEVPoolPluginCreator()
-{
-  attr_.clear();
-  attr_.emplace_back(PluginField("bev_h", nullptr, PluginFieldType::kINT32, 1));
-  attr_.emplace_back(PluginField("bev_w", nullptr, PluginFieldType::kINT32, 1));
-  attr_.emplace_back(PluginField("n", nullptr, PluginFieldType::kINT32, 1));
-
-  fc_.nbFields = attr_.size();
-  fc_.fields = attr_.data();
-}
-
-BEVPoolPluginCreator::~BEVPoolPluginCreator()
-{
-}
-
-IPluginV2DynamicExt * BEVPoolPluginCreator::createPlugin(
-  const char * name, const PluginFieldCollection * fc) noexcept
-{
-  const PluginField * fields = fc->fields;
-
-  int bev_h = -1;
-  int bev_w = -1;
-  int n = -1;
-
-  for (int i = 0; i < fc->nbFields; ++i) {
-    if (std::string(fc->fields[i].name) == std::string("bev_h")) {
-      bev_h = *reinterpret_cast<const int *>(fc->fields[i].data);
-    } else if (std::string(fc->fields[i].name) == std::string("bev_w")) {
-      bev_w = *reinterpret_cast<const int *>(fc->fields[i].data);
-    } else if (std::string(fc->fields[i].name) == std::string("n")) {
-      n = *reinterpret_cast<const int *>(fc->fields[i].data);
-    }
-  }
-  BEVPoolPlugin * pObj = new BEVPoolPlugin(name, bev_h, bev_w, n);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-IPluginV2DynamicExt * BEVPoolPluginCreator::deserializePlugin(
-  const char * name, const void * serialData, size_t serialLength) noexcept
-{
-  BEVPoolPlugin * pObj = new BEVPoolPlugin(name, serialData, serialLength);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-void BEVPoolPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * BEVPoolPluginCreator::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * BEVPoolPluginCreator::getPluginName() const noexcept
-{
-  return BEVPOOL_PLUGIN_NAME;
-}
-
-const char * BEVPoolPluginCreator::getPluginVersion() const noexcept
-{
-  return BEVPOOL_PLUGIN_VERSION;
-}
-
-const PluginFieldCollection * BEVPoolPluginCreator::getFieldNames() noexcept
-{
-  return &fc_;
-}
-
-REGISTER_TENSORRT_PLUGIN(BEVPoolPluginCreator);
-
-}  // namespace nvinfer1
diff --git a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
deleted file mode 100644
index 1afe91becfb95..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/cpu_jpegdecoder.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp"
-
-#include "autoware/tensorrt_bevdet/preprocess.hpp"
-
-#include <cuda_runtime.h>
-#include <jpeglib.h>
-#include <setjmp.h>
-#include <stddef.h>
-#include <stdio.h>
-
-#include <vector>
-
-struct jpeg_error_handler
-{
-  struct jpeg_error_mgr pub;
-  jmp_buf setjmp_buffer;
-};
-
-int decode_jpeg(const std::vector<char> & buffer, uchar * output)
-{
-  struct jpeg_decompress_struct cinfo;
-  struct jpeg_error_handler jerr;
-  cinfo.err = jpeg_std_error(&jerr.pub);
-
-  jerr.pub.error_exit = [](j_common_ptr cinfo) {
-    jpeg_error_handler * myerr = reinterpret_cast<jpeg_error_handler *>(cinfo->err);
-    longjmp(myerr->setjmp_buffer, 1);
-  };
-
-  if (setjmp(jerr.setjmp_buffer)) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("decode_jpeg"), "Failed to decompress jpeg: %s",
-      jerr.pub.jpeg_message_table[jerr.pub.msg_code]);
-    jpeg_destroy_decompress(&cinfo);
-    return EXIT_FAILURE;
-  }
-  jpeg_create_decompress(&cinfo);
-
-  if (buffer.size() == 0) {
-    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "buffer size is 0");
-    return EXIT_FAILURE;
-  }
-  jpeg_mem_src(&cinfo, reinterpret_cast<const uchar *>(buffer.data()), buffer.size());
-  if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
-    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "Failed to read jpeg header");
-    return EXIT_FAILURE;
-  }
-  if (jpeg_start_decompress(&cinfo) != TRUE) {
-    RCLCPP_ERROR(rclcpp::get_logger("decode_jpeg"), "Failed to start decompress");
-    return EXIT_FAILURE;
-  }
-
-  while (cinfo.output_scanline < cinfo.output_height) {
-    auto row = cinfo.output_scanline;
-    uchar * ptr = output + row * cinfo.image_width * 3;
-    jpeg_read_scanlines(&cinfo, (JSAMPARRAY)&ptr, 1);
-  }
-  jpeg_finish_decompress(&cinfo);
-  jpeg_destroy_decompress(&cinfo);
-  return EXIT_SUCCESS;
-}
-
-int decode_cpu(
-  const std::vector<std::vector<char>> & files_data, uchar * out_imgs, size_t width, size_t height)
-{
-  uchar * temp = new uchar[width * height * 3];
-  uchar * temp_gpu = nullptr;
-  CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
-  for (size_t i = 0; i < files_data.size(); i++) {
-    if (decode_jpeg(files_data[i], temp)) {
-      return EXIT_FAILURE;
-    }
-    CHECK_CUDA(cudaMemcpy(temp_gpu, temp, width * height * 3, cudaMemcpyHostToDevice));
-    convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
-    CHECK_CUDA(cudaDeviceSynchronize());
-  }
-  CHECK_CUDA(cudaFree(temp_gpu));
-  delete[] temp;
-  return EXIT_SUCCESS;
-}
diff --git a/perception/autoware_tensorrt_bevdet/src/data.cpp b/perception/autoware_tensorrt_bevdet/src/data.cpp
deleted file mode 100644
index 6dbb02015b5a2..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/data.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "autoware/tensorrt_bevdet/data.hpp"
-
-#include "autoware/tensorrt_bevdet/cpu_jpegdecoder.hpp"
-
-#include <stdlib.h>
-
-#include <chrono>
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-
-using std::chrono::duration;
-using std::chrono::high_resolution_clock;
-
-camParams::camParams(const YAML::Node & config, int n, std::vector<std::string> & cams_name)
-: N_img(n)
-{
-  if (static_cast<size_t>(n) != cams_name.size()) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("camParams"), "Error! Need %d camera param, but given %zu camera names!",
-      n, cams_name.size());
-  }
-  ego2global_rot = fromYamlQuater(config["ego2global_rotation"]);
-  ego2global_trans = fromYamlTrans(config["ego2global_translation"]);
-
-  lidar2ego_rot = fromYamlQuater(config["lidar2ego_rotation"]);
-  lidar2ego_trans = fromYamlTrans(config["lidar2ego_translation"]);
-
-  timestamp = config["timestamp"].as<std::int64_t>();
-  scene_token = config["scene_token"].as<std::string>();
-
-  imgs_file.clear();
-
-  cams_intrin.clear();
-  cams2ego_rot.clear();
-  cams2ego_trans.clear();
-
-  for (const std::string & name : cams_name) {
-    imgs_file.push_back("." + config["cams"][name]["data_path"].as<std::string>());
-
-    //
-    cams_intrin.push_back(fromYamlMatrix3f(config["cams"][name]["cam_intrinsic"]));
-    cams2ego_rot.push_back(fromYamlQuater(config["cams"][name]["sensor2ego_rotation"]));
-    cams2ego_trans.push_back(fromYamlTrans(config["cams"][name]["sensor2ego_translation"]));
-    //
-  }
-}
-
-Eigen::Translation3f fromYamlTrans(YAML::Node x)
-{
-  std::vector<float> trans = x.as<std::vector<float>>();
-  return Eigen::Translation3f(trans[0], trans[1], trans[2]);
-}
-
-Eigen::Quaternion<float> fromYamlQuater(YAML::Node x)
-{
-  std::vector<float> quater = x.as<std::vector<float>>();
-  return Eigen::Quaternion<float>(quater[0], quater[1], quater[2], quater[3]);
-}
-
-Eigen::Matrix3f fromYamlMatrix3f(YAML::Node x)
-{
-  std::vector<std::vector<float>> m = x.as<std::vector<std::vector<float>>>();
-  Eigen::Matrix3f mat;
-  for (size_t i = 0; i < m.size(); i++) {
-    for (size_t j = 0; j < m[0].size(); j++) {
-      mat(i, j) = m[i][j];
-    }
-  }
-  return mat;
-}
diff --git a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu b/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
deleted file mode 100644
index e9fecf8cef37a..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/gatherbev_plugin.cu
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "autoware/tensorrt_bevdet/common.hpp"
-#include "autoware/tensorrt_bevdet/gatherbev_plugin.hpp"
-
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-
-// input[0] == adj_feat   b x 8 x 80 x 128 x 128
-// input[1] == curr_feat  b x 80 x 128 x 128
-// input[2] == flag       b x 1
-// out[0]                 b x (8+1)*80 x 128 x 128
-template <typename T>
-__global__ void copy_feat_kernel(
-  int nthreads,  // b * (adj_num + 1) * map_size
-  int adj_num, int channel, int map_size, const T * adj_feats, const T * curr_feat,
-  const int * flag, T * out_feats)
-{
-  CUDA_1D_KERNEL_LOOP(idx, nthreads)
-  {
-    int b = idx / ((adj_num + 1) * map_size);
-    int n = (idx / map_size) % (adj_num + 1);
-    int m = idx % map_size;
-
-    int start = b * (adj_num + 1) * channel * map_size + n * channel * map_size + m;
-    int end = start + channel * map_size;
-    for (int i = start, c = 0; i < end; i += map_size, c++) {
-      if (flag[b] == 0 || n == 0) {
-        out_feats[i] = curr_feat[b * channel * map_size + c * map_size + m];
-      } else {
-        out_feats[i] = adj_feats[i - channel * map_size];
-      }
-    }
-  }
-}
-
-namespace nvinfer1
-{
-// class GatherBEVPlugin
-GatherBEVPlugin::GatherBEVPlugin(const std::string & name) : name_(name)
-{
-}
-
-GatherBEVPlugin::GatherBEVPlugin(const std::string & name, const void * buffer, size_t length)
-: name_(name)
-{
-  memcpy(&m_, buffer, sizeof(m_));
-}
-
-GatherBEVPlugin::~GatherBEVPlugin()
-{
-}
-
-IPluginV2DynamicExt * GatherBEVPlugin::clone() const noexcept
-{
-  auto p = new GatherBEVPlugin(name_, &m_, sizeof(m_));
-  p->setPluginNamespace(namespace_.c_str());
-  return p;
-}
-
-int32_t GatherBEVPlugin::getNbOutputs() const noexcept
-{
-  return 1;
-}
-
-DataType GatherBEVPlugin::getOutputDataType(
-  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
-{
-  return inputTypes[0];  // 与adj_feat一致
-}
-
-DimsExprs GatherBEVPlugin::getOutputDimensions(
-  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-  IExprBuilder & exprBuilder) noexcept
-{
-  // input[0] == adj_feat   b x 8 x 80 x 128 x 128
-  // input[1] == curr_feat  b x 80 x 128 x 128
-  // input[2] == flag       b x 1
-  // out[0]                 b x (8+1)*80 x 128 x 128
-  DimsExprs ret;
-  ret.nbDims = inputs[0].nbDims - 1;
-
-  IDimensionExpr const * n_feat =
-    exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[1], *exprBuilder.constant(1));
-  ret.d[0] = inputs[0].d[0];
-  ret.d[1] = exprBuilder.operation(DimensionOperation::kPROD, *n_feat, *inputs[0].d[2]);
-  ret.d[2] = inputs[0].d[3];
-  ret.d[3] = inputs[0].d[4];
-
-  return ret;
-}
-
-bool GatherBEVPlugin::supportsFormatCombination(
-  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
-{
-  // adj_feat
-  if (pos == 0) {
-    return (inOut[pos].type == DataType::kFLOAT || inOut[pos].type == DataType::kHALF) &&
-           inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 1) {  // curr_feat
-    return inOut[0].type == inOut[1].type && inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 3) {  // out
-    return inOut[0].type == inOut[3].type && inOut[pos].format == TensorFormat::kLINEAR;
-  } else if (pos == 2) {
-    return inOut[pos].type == DataType::kINT32 && inOut[pos].format == TensorFormat::kLINEAR;
-  }
-  return false;
-}
-
-void GatherBEVPlugin::configurePlugin(
-  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-  int32_t nbOutputs) noexcept
-{
-  return;
-}
-
-size_t GatherBEVPlugin::getWorkspaceSize(
-  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-  int32_t nbOutputs) const noexcept
-{
-  return 0;
-}
-
-int32_t GatherBEVPlugin::enqueue(
-  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-  const void * const * inputs, void * const * outputs, void * workspace,
-  cudaStream_t stream) noexcept
-{
-  // input[0] == adj_feat   b x 8 x 80 x 128 x 128
-  // input[1] == curr_feat  b x 80 x 128 x 128
-  // input[2] == flag       b x 1
-  // out[0]                 b x (8+1)*80 x 128 x 128
-
-  // int nthreads, // b * (adj_num + 1) * map_size
-  // int adj_num,
-  // int channel,
-  // int map_size,
-
-  // int flag = 0;
-  // CHECK_CUDA(cudaMemcpy(&flag, inputs[2], sizeof(int), cudaMemcpyDeviceToHost));
-
-  int b = inputDesc[0].dims.d[0];
-  int adj_num = inputDesc[0].dims.d[1];
-  int map_size = inputDesc[0].dims.d[3] * inputDesc[0].dims.d[4];
-  int channel = inputDesc[0].dims.d[2];
-
-  int feat_step = inputDesc[1].dims.d[1] * inputDesc[1].dims.d[2] * inputDesc[1].dims.d[3];
-
-  int nthreads = b * (adj_num + 1) * map_size;
-
-  dim3 grid(GET_BLOCKS(nthreads));
-  dim3 block(NUM_THREADS);
-
-  switch (int(outputDesc[0].type)) {
-    case int(DataType::kFLOAT):
-      // fp32
-      copy_feat_kernel<<<grid, block, 0, stream>>>(
-        nthreads, adj_num, channel, map_size, reinterpret_cast<const float *>(inputs[0]),
-        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-        reinterpret_cast<float *>(outputs[0]));
-
-      break;
-    case int(DataType::kHALF):
-      // fp16
-      copy_feat_kernel<<<grid, block, 0, stream>>>(
-        nthreads, adj_num, channel, map_size, reinterpret_cast<const __half *>(inputs[0]),
-        reinterpret_cast<const __half *>(inputs[1]), reinterpret_cast<const int *>(inputs[2]),
-        reinterpret_cast<__half *>(outputs[0]));
-      break;
-    default:  // should NOT be here
-      RCLCPP_ERROR(rclcpp::get_logger("GatherBEVPlugin"), "\tUnsupport datatype!");
-  }
-  return 0;
-}
-
-void GatherBEVPlugin::destroy() noexcept
-{
-  delete this;
-  return;
-}
-
-int32_t GatherBEVPlugin::initialize() noexcept
-{
-  return 0;
-}
-
-void GatherBEVPlugin::terminate() noexcept
-{
-  return;
-}
-
-size_t GatherBEVPlugin::getSerializationSize() const noexcept
-{
-  return sizeof(m_);
-}
-
-void GatherBEVPlugin::serialize(void * buffer) const noexcept
-{
-  memcpy(buffer, &m_, sizeof(m_));
-  return;
-}
-
-void GatherBEVPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * GatherBEVPlugin::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * GatherBEVPlugin::getPluginType() const noexcept
-{
-  return GATHERBEV_PLUGIN_NAME;
-}
-
-const char * GatherBEVPlugin::getPluginVersion() const noexcept
-{
-  return GATHERBEV_PLUGIN_VERSION;
-}
-
-void GatherBEVPlugin::attachToContext(
-  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
-{
-  return;
-}
-
-void GatherBEVPlugin::detachFromContext() noexcept
-{
-  return;
-}
-
-// class GatherBEVPluginCreator
-PluginFieldCollection GatherBEVPluginCreator::fc_{};
-std::vector<PluginField> GatherBEVPluginCreator::attr_;
-
-GatherBEVPluginCreator::GatherBEVPluginCreator()
-{
-  attr_.clear();
-  fc_.nbFields = attr_.size();
-  fc_.fields = attr_.data();
-}
-
-GatherBEVPluginCreator::~GatherBEVPluginCreator()
-{
-}
-
-IPluginV2DynamicExt * GatherBEVPluginCreator::createPlugin(
-  const char * name, const PluginFieldCollection * fc) noexcept
-{
-  GatherBEVPlugin * pObj = new GatherBEVPlugin(name);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-IPluginV2DynamicExt * GatherBEVPluginCreator::deserializePlugin(
-  const char * name, const void * serialData, size_t serialLength) noexcept
-{
-  GatherBEVPlugin * pObj = new GatherBEVPlugin(name, serialData, serialLength);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-void GatherBEVPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * GatherBEVPluginCreator::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * GatherBEVPluginCreator::getPluginName() const noexcept
-{
-  return GATHERBEV_PLUGIN_NAME;
-}
-
-const char * GatherBEVPluginCreator::getPluginVersion() const noexcept
-{
-  return GATHERBEV_PLUGIN_VERSION;
-}
-
-const PluginFieldCollection * GatherBEVPluginCreator::getFieldNames() noexcept
-{
-  return &fc_;
-}
-
-REGISTER_TENSORRT_PLUGIN(GatherBEVPluginCreator);
-
-}  // namespace nvinfer1
diff --git a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu b/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
deleted file mode 100644
index fe68a6f03bb0f..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/iou3d_nms.cu
+++ /dev/null
@@ -1,633 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-/*
-3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
-Written by Shaoshuai Shi
-All Rights Reserved 2019-2022.
-*/
-
-#include "autoware/tensorrt_bevdet/iou3d_nms.hpp"
-
-struct Point
-{
-  float x, y;
-  __device__ Point() {}
-  __device__ Point(double _x, double _y) { x = _x, y = _y; }
-
-  __device__ void set(float _x, float _y)
-  {
-    x = _x;
-    y = _y;
-  }
-
-  __device__ Point operator+(const Point & b) const { return Point(x + b.x, y + b.y); }
-
-  __device__ Point operator-(const Point & b) const { return Point(x - b.x, y - b.y); }
-};
-
-__device__ inline float cross(const Point & a, const Point & b)
-{
-  return a.x * b.y - a.y * b.x;
-}
-
-__device__ inline float cross(const Point & p1, const Point & p2, const Point & p0)
-{
-  return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
-}
-
-__device__ int check_rect_cross(
-  const Point & p1, const Point & p2, const Point & q1, const Point & q2)
-{
-  int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) && min(q1.x, q2.x) <= max(p1.x, p2.x) &&
-            min(p1.y, p2.y) <= max(q1.y, q2.y) && min(q1.y, q2.y) <= max(p1.y, p2.y);
-  return ret;
-}
-
-__device__ inline int check_in_box2d(const float * box, const Point & p)
-{
-  // params: (7) [x, y, z, dx, dy, dz, heading]
-  const float kMargin = 1e-2;
-
-  float center_x = box[0], center_y = box[1];
-  // rotate the point in the opposite direction of box
-  float angle_cos = cos(-box[6]);
-  float angle_sin = sin(-box[6]);
-  float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
-  float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
-
-  return (fabs(rot_x) < box[3] / 2 + kMargin && fabs(rot_y) < box[4] / 2 + kMargin);
-}
-
-__device__ inline int intersection(
-  const Point & p1, const Point & p0, const Point & q1, const Point & q0, Point & ans)
-{
-  // fast exclusion
-  if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
-
-  // check cross standing
-  float s1 = cross(q0, p1, p0);
-  float s2 = cross(p1, q1, p0);
-  float s3 = cross(p0, q1, q0);
-  float s4 = cross(q1, p1, q0);
-
-  if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;
-
-  // calculate intersection of two lines
-  float s5 = cross(q1, p1, p0);
-  if (fabs(s5 - s1) > EPS) {
-    ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
-    ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
-
-  } else {
-    float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
-    float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
-    float D = a0 * b1 - a1 * b0;
-
-    ans.x = (b0 * c1 - b1 * c0) / D;
-    ans.y = (a1 * c0 - a0 * c1) / D;
-  }
-
-  return 1;
-}
-
-__device__ inline void rotate_around_center(
-  const Point & center, const float angle_cos, const float angle_sin, Point & p)
-{
-  float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * (-angle_sin) + center.x;
-  float new_y = (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
-  p.set(new_x, new_y);
-}
-
-__device__ inline int point_cmp(const Point & a, const Point & b, const Point & center)
-{
-  return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x);
-}
-
-__device__ inline float box_overlap(const float * box_a, const float * box_b)
-{
-  // params box_a: [x, y, z, dx, dy, dz, heading]
-  // params box_b: [x, y, z, dx, dy, dz, heading]
-
-  float a_angle = box_a[6], b_angle = box_b[6];
-  float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, a_dy_half = box_a[4] / 2,
-        b_dy_half = box_b[4] / 2;
-  float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half;
-  float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half;
-  float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half;
-  float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half;
-
-  Point center_a(box_a[0], box_a[1]);
-  Point center_b(box_b[0], box_b[1]);
-
-#ifdef DEBUG
-  RCLCPP_DEBUG(
-    rclcpp::get_logger("iou3d_nms"),
-    "a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)", a_x1, a_y1, a_x2, a_y2,
-    a_angle, b_x1, b_y1, b_x2, b_y2, b_angle);
-  RCLCPP_DEBUG(
-    rclcpp::get_logger("iou3d_nms"), "center a: (%.3f, %.3f), b: (%.3f, %.3f)", center_a.x,
-    center_a.y, center_b.x, center_b.y);
-#endif
-
-  Point box_a_corners[5];
-  box_a_corners[0].set(a_x1, a_y1);
-  box_a_corners[1].set(a_x2, a_y1);
-  box_a_corners[2].set(a_x2, a_y2);
-  box_a_corners[3].set(a_x1, a_y2);
-
-  Point box_b_corners[5];
-  box_b_corners[0].set(b_x1, b_y1);
-  box_b_corners[1].set(b_x2, b_y1);
-  box_b_corners[2].set(b_x2, b_y2);
-  box_b_corners[3].set(b_x1, b_y2);
-
-  // get oriented corners
-  float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
-  float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
-
-  for (int k = 0; k < 4; k++) {
-#ifdef DEBUG
-    RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"), "before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)", k,
-      box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
-#endif
-    rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
-    rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
-#ifdef DEBUG
-    RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"), "corner %d: a(%.3f, %.3f), b(%.3f, %.3f)", k,
-      box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y);
-#endif
-  }
-
-  box_a_corners[4] = box_a_corners[0];
-  box_b_corners[4] = box_b_corners[0];
-
-  // get intersection of lines
-  Point cross_points[16];
-  Point poly_center;
-  int cnt = 0, flag = 0;
-
-  poly_center.set(0, 0);
-  for (int i = 0; i < 4; i++) {
-    for (int j = 0; j < 4; j++) {
-      flag = intersection(
-        box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j],
-        cross_points[cnt]);
-      if (flag) {
-        poly_center = poly_center + cross_points[cnt];
-        cnt++;
-#ifdef DEBUG
-        RCLCPP_DEBUG(
-          rclcpp::get_logger("iou3d_nms"),
-          "Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f)",
-          cross_points[cnt - 1].x, cross_points[cnt - 1].y, box_a_corners[i].x, box_a_corners[i].y,
-          box_a_corners[i + 1].x, box_a_corners[i + 1].y, box_b_corners[i].x, box_b_corners[i].y,
-          box_b_corners[i + 1].x, box_b_corners[i + 1].y);
-#endif
-      }
-    }
-  }
-
-  // check corners
-  for (int k = 0; k < 4; k++) {
-    if (check_in_box2d(box_a, box_b_corners[k])) {
-      poly_center = poly_center + box_b_corners[k];
-      cross_points[cnt] = box_b_corners[k];
-      cnt++;
-#ifdef DEBUG
-      RCLCPP_DEBUG(
-        rclcpp::get_logger("iou3d_nms"), "b corners in a: corner_b(%.3f, %.3f)",
-        cross_points[cnt - 1].x, cross_points[cnt - 1].y);
-#endif
-    }
-    if (check_in_box2d(box_b, box_a_corners[k])) {
-      poly_center = poly_center + box_a_corners[k];
-      cross_points[cnt] = box_a_corners[k];
-      cnt++;
-#ifdef DEBUG
-      RCLCPP_DEBUG(
-        rclcpp::get_logger("iou3d_nms"), "a corners in b: corner_a(%.3f, %.3f)",
-        cross_points[cnt - 1].x, cross_points[cnt - 1].y);
-#endif
-    }
-  }
-
-  poly_center.x /= cnt;
-  poly_center.y /= cnt;
-
-  // sort the points of polygon
-  Point temp;
-  for (int j = 0; j < cnt - 1; j++) {
-    for (int i = 0; i < cnt - j - 1; i++) {
-      if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
-        temp = cross_points[i];
-        cross_points[i] = cross_points[i + 1];
-        cross_points[i + 1] = temp;
-      }
-    }
-  }
-
-#ifdef DEBUG
-  RCLCPP_DEBUG(rclcpp::get_logger("iou3d_nms"), "cnt=%d", cnt);
-  for (int i = 0; i < cnt; i++) {
-    RCLCPP_DEBUG(
-      rclcpp::get_logger("iou3d_nms"), "All cross point %d: (%.3f, %.3f)", i, cross_points[i].x,
-      cross_points[i].y);
-  }
-#endif
-
-  // get the overlap areas
-  float area = 0;
-  for (int k = 0; k < cnt - 1; k++) {
-    area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]);
-  }
-
-  return fabs(area) / 2.0;
-}
-
-__device__ inline float iou_bev(const float * box_a, const float * box_b)
-{
-  // params box_a: [x, y, z, dx, dy, dz, heading]
-  // params box_b: [x, y, z, dx, dy, dz, heading]
-  float sa = box_a[3] * box_a[4];
-  float sb = box_b[3] * box_b[4];
-  float s_overlap = box_overlap(box_a, box_b);
-  return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
-}
-
-__device__ inline float iou_normal(float const * const a, float const * const b)
-{
-  // params: a: [x, y, z, dx, dy, dz, heading]
-  // params: b: [x, y, z, dx, dy, dz, heading]
-  float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2),
-        right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2);
-  float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2),
-        bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2);
-  float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
-  float interS = width * height;
-  float Sa = a[3] * a[4];
-  float Sb = b[3] * b[4];
-  return interS / fmaxf(Sa + Sb - interS, EPS);
-}
-
-__global__ void boxes_overlap_kernel(
-  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b,
-  float * ans_overlap)
-{
-  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
-  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
-  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
-
-  if (a_idx >= num_a || b_idx >= num_b) {
-    return;
-  }
-  const float * cur_box_a = boxes_a + a_idx * kBoxBlockSize;
-  const float * cur_box_b = boxes_b + b_idx * kBoxBlockSize;
-  float s_overlap = box_overlap(cur_box_a, cur_box_b);
-  ans_overlap[a_idx * num_b + b_idx] = s_overlap;
-}
-
-__global__ void boxes_iou_bev_kernel(
-  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b, float * ans_iou)
-{
-  // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
-  const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
-  const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
-
-  if (a_idx >= num_a || b_idx >= num_b) {
-    return;
-  }
-
-  const float * cur_box_a = boxes_a + a_idx * kBoxBlockSize;
-  const float * cur_box_b = boxes_b + b_idx * kBoxBlockSize;
-  float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
-  ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
-}
-
-__global__ void RotatedNmsKernel(
-  const int boxes_num, const float nms_overlap_thresh, const float * boxes, std::int64_t * mask)
-{
-  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
-
-  const int row_start = blockIdx.y;
-  const int col_start = blockIdx.x;
-
-  // if (row_start > col_start) return;
-
-  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-
-  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
-
-  if (threadIdx.x < col_size) {
-    block_boxes[threadIdx.x * kBoxBlockSize + 0] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
-    block_boxes[threadIdx.x * kBoxBlockSize + 1] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
-    block_boxes[threadIdx.x * kBoxBlockSize + 2] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
-    block_boxes[threadIdx.x * kBoxBlockSize + 3] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
-    block_boxes[threadIdx.x * kBoxBlockSize + 4] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
-    block_boxes[threadIdx.x * kBoxBlockSize + 5] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
-    block_boxes[threadIdx.x * kBoxBlockSize + 6] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
-  }
-  __syncthreads();
-
-  if (threadIdx.x < row_size) {
-    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
-    const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
-
-    int i = 0;
-    std::int64_t t = 0;
-    int start = 0;
-    if (row_start == col_start) {
-      start = threadIdx.x + 1;
-    }
-    for (i = start; i < col_size; i++) {
-      if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
-        t |= 1ULL << i;
-      }
-    }
-    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
-    mask[cur_box_idx * col_blocks + col_start] = t;
-  }
-}
-
-__global__ void NmsKernel(
-  const int boxes_num, const float nms_overlap_thresh, const float * boxes, std::int64_t * mask)
-{
-  // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params: mask (N, N/THREADS_PER_BLOCK_NMS)
-
-  const int row_start = blockIdx.y;
-  const int col_start = blockIdx.x;
-
-  // if (row_start > col_start) return;
-
-  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-
-  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
-
-  if (threadIdx.x < col_size) {
-    block_boxes[threadIdx.x * kBoxBlockSize + 0] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 0];
-    block_boxes[threadIdx.x * kBoxBlockSize + 1] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 1];
-    block_boxes[threadIdx.x * kBoxBlockSize + 2] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 2];
-    block_boxes[threadIdx.x * kBoxBlockSize + 3] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 3];
-    block_boxes[threadIdx.x * kBoxBlockSize + 4] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 4];
-    block_boxes[threadIdx.x * kBoxBlockSize + 5] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 5];
-    block_boxes[threadIdx.x * kBoxBlockSize + 6] =
-      boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * kBoxBlockSize + 6];
-  }
-  __syncthreads();
-
-  if (threadIdx.x < row_size) {
-    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
-    const float * cur_box = boxes + cur_box_idx * kBoxBlockSize;
-
-    int i = 0;
-    std::int64_t t = 0;
-    int start = 0;
-    if (row_start == col_start) {
-      start = threadIdx.x + 1;
-    }
-    for (i = start; i < col_size; i++) {
-      if (iou_normal(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
-        t |= 1ULL << i;
-      }
-    }
-    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
-    mask[cur_box_idx * col_blocks + col_start] = t;
-  }
-}
-
-__global__ void RotatedNmsWithIndicesKernel(
-  const int boxes_num, const float nms_overlap_thresh, const float * res_box,
-  const int * res_sorted_indices, std::int64_t * mask)
-{
-  const int row_start = blockIdx.y;
-  const int col_start = blockIdx.x;
-
-  const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-  const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
-
-  __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * kBoxBlockSize];
-
-  if (threadIdx.x < col_size) {  // 419, 420 have two situations to discuss separately
-    const int col_actual_idx = res_sorted_indices[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
-    block_boxes[threadIdx.x * kBoxBlockSize + 0] = res_box[col_actual_idx * kBoxBlockSize + 0];
-    block_boxes[threadIdx.x * kBoxBlockSize + 1] = res_box[col_actual_idx * kBoxBlockSize + 1];
-    block_boxes[threadIdx.x * kBoxBlockSize + 2] = res_box[col_actual_idx * kBoxBlockSize + 2];
-    block_boxes[threadIdx.x * kBoxBlockSize + 3] = res_box[col_actual_idx * kBoxBlockSize + 3];
-    block_boxes[threadIdx.x * kBoxBlockSize + 4] = res_box[col_actual_idx * kBoxBlockSize + 4];
-    block_boxes[threadIdx.x * kBoxBlockSize + 5] = res_box[col_actual_idx * kBoxBlockSize + 5];
-    block_boxes[threadIdx.x * kBoxBlockSize + 6] = res_box[col_actual_idx * kBoxBlockSize + 6];
-  }
-  __syncthreads();  // Waiting for threads within the block
-
-  if (threadIdx.x < row_size) {  //
-    const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
-    const int row_actual_idx = res_sorted_indices[cur_box_idx];
-
-    float cur_box[kBoxBlockSize];
-    cur_box[0] = res_box[row_actual_idx * kBoxBlockSize + 0];
-    cur_box[1] = res_box[row_actual_idx * kBoxBlockSize + 1];
-    cur_box[2] = res_box[row_actual_idx * kBoxBlockSize + 2];
-    cur_box[3] = res_box[row_actual_idx * kBoxBlockSize + 3];
-    cur_box[4] = res_box[row_actual_idx * kBoxBlockSize + 4];
-    cur_box[5] = res_box[row_actual_idx * kBoxBlockSize + 5];
-    cur_box[6] = res_box[row_actual_idx * kBoxBlockSize + 6];
-
-    int i = 0;
-    std::int64_t t = 0;
-    int start = 0;
-    if (row_start == col_start) {
-      start = threadIdx.x + 1;  // Isn't it right now to compare with [0~threadIdx. x)? FIXME
-    }  // I think this is correct, and there is no need to compare it with numbers smaller than my
-       // own. If col is smaller than row, there is also no need to compare it
-    for (i = start; i < col_size; i++) {
-      if (iou_bev(cur_box, block_boxes + i * kBoxBlockSize) > nms_overlap_thresh) {
-        t |= 1ULL << i;
-      }
-    }
-
-    const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
-    // assume cur_box_idx = 21, col_start = 0, row_start = 0 , threadIdx = 21,
-    // mark 21 th box and top 64 boxes
-    mask[cur_box_idx * col_blocks + col_start] = t;
-    // or col_start * boxes_num + cur_box_idx
-  }
-}
-
-__global__ void box_assign_kernel(
-  float * reg, float * height, float * dim, float * rot, float * boxes, float * score, int * label,
-  float * out_score, int * out_label, int * validIndexs, int head_x_size, int head_y_size)
-{
-  int boxId = blockIdx.x;
-  int channel = threadIdx.x;
-  int idx = validIndexs[boxId];
-  if (channel == 0) {
-    boxes[boxId * kBoxBlockSize + 0] = reg[idx];
-  } else if (channel == 1) {
-    boxes[boxId * kBoxBlockSize + 1] = reg[idx + head_x_size * head_y_size];
-  } else if (channel == 2) {
-    boxes[boxId * kBoxBlockSize + 2] = height[idx];
-  } else if (channel == 3) {
-    boxes[boxId * kBoxBlockSize + 3] = dim[idx];
-  } else if (channel == 4) {
-    boxes[boxId * kBoxBlockSize + 4] = dim[idx + head_x_size * head_y_size];
-  } else if (channel == 5) {
-    boxes[boxId * kBoxBlockSize + 5] = dim[idx + 2 * head_x_size * head_y_size];
-  } else if (channel == 6) {
-    float theta =
-      atan2f(rot[0 * head_x_size * head_y_size + idx], rot[1 * head_x_size * head_y_size + idx]);
-    theta = -theta - 3.1415926 / 2;
-    boxes[boxId * kBoxBlockSize + 6] = theta;
-  }
-  // else if(channel == 7)
-  // out_score[boxId] = score[idx];
-  else if (channel == 8) {
-    out_label[boxId] = label[idx];
-  }
-}
-
-void box_assign_launcher(
-  float * reg, float * height, float * dim, float * rot, float * boxes, float * score, int * label,
-  float * out_score, int * out_label, int * validIndexs, int boxSize, int head_x_size,
-  int head_y_size)
-{
-  box_assign_kernel<<<boxSize, 9>>>(
-    reg, height, dim, rot, boxes, score, label, out_score, out_label, validIndexs, head_x_size,
-    head_y_size);
-}
-
-__global__ void index_assign(int * indexs)
-{
-  int yIdx = blockIdx.x;
-  int xIdx = threadIdx.x;
-  int idx = yIdx * blockDim.x + xIdx;
-  indexs[idx] = idx;
-}
-
-void index_assign_launcher(int * indexs, int head_x_size, int head_y_size)
-{
-  index_assign<<<head_x_size, head_y_size>>>(indexs);
-}
-
-void boxes_overlap_launcher(
-  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b,
-  float * ans_overlap)
-{
-  dim3 blocks(
-    DIVUP(num_b, THREADS_PER_BLOCK),
-    DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
-  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
-
-  boxes_overlap_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b, ans_overlap);
-#ifdef DEBUG
-  cudaDeviceSynchronize();  // for using printf in kernel function
-#endif
-}
-
-void boxes_iou_bev_launcher(
-  const int num_a, const float * boxes_a, const int num_b, const float * boxes_b, float * ans_iou)
-{
-  dim3 blocks(
-    DIVUP(num_b, THREADS_PER_BLOCK),
-    DIVUP(num_a, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
-  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
-
-  boxes_iou_bev_kernel<<<blocks, threads>>>(num_a, boxes_a, num_b, boxes_b, ans_iou);
-#ifdef DEBUG
-  cudaDeviceSynchronize();  // for using printf in kernel function
-#endif
-}
-
-void nms_launcher(const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
-{
-  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
-  dim3 threads(THREADS_PER_BLOCK_NMS);
-
-  RotatedNmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
-}
-
-void nms_normal_launcher(
-  const float * boxes, std::int64_t * mask, int boxes_num, float nms_overlap_thresh)
-{
-  dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), DIVUP(boxes_num, THREADS_PER_BLOCK_NMS));
-  dim3 threads(THREADS_PER_BLOCK_NMS);
-  NmsKernel<<<blocks, threads>>>(boxes_num, nms_overlap_thresh, boxes, mask);
-}
-
-Iou3dNmsCuda::Iou3dNmsCuda(
-  const int head_x_size, const int head_y_size, const float nms_overlap_thresh)
-: head_x_size_(head_x_size), head_y_size_(head_y_size), nms_overlap_thresh_(nms_overlap_thresh)
-{
-}
-
-int Iou3dNmsCuda::DoIou3dNms(
-  const int box_num_pre, const float * res_box, const int * res_sorted_indices,
-  std::int64_t * host_keep_data)
-{
-  const int col_blocks = DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS);  // How many blocks are divided
-  std::int64_t * dev_mask = NULL;
-  cudaMalloc((void **)&dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t));  //
-  // THREADS_PER_BLOCK_NMS 掩码的长度
-  dim3 blocks(DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS), DIVUP(box_num_pre, THREADS_PER_BLOCK_NMS));
-  dim3 threads(THREADS_PER_BLOCK_NMS);
-  RotatedNmsWithIndicesKernel<<<blocks, threads>>>(
-    box_num_pre, nms_overlap_thresh_, res_box, res_sorted_indices, dev_mask);
-
-  std::int64_t host_mask[box_num_pre * col_blocks];
-  cudaMemcpy(
-    host_mask, dev_mask, box_num_pre * col_blocks * sizeof(std::int64_t), cudaMemcpyDeviceToHost);
-  cudaFree(dev_mask);
-
-  std::int64_t host_remv[col_blocks];
-  memset(host_remv, 0, col_blocks * sizeof(std::int64_t));
-  int num_to_keep = 0;
-  for (int i = 0; i < box_num_pre; i++) {
-    int nblock = i / THREADS_PER_BLOCK_NMS;
-    int inblock = i % THREADS_PER_BLOCK_NMS;
-    if (!(host_remv[nblock] &
-          (1ULL << inblock))) {  // Meeting this requirement indicates that it does not conflict
-                                 // with any of the boxes selected earlier
-      host_keep_data[num_to_keep++] = i;  // Add the satisfied box
-      for (int j = nblock; j < col_blocks;
-           j++) {  // Consider boxes with index i that overlap beyond the threshold
-        host_remv[j] |= host_mask[i * col_blocks + j];
-      }
-    }
-  }
-
-  if (cudaSuccess != cudaGetLastError()) {
-    RCLCPP_ERROR(
-      rclcpp::get_logger("iou3d_nms"), "Error: %s", cudaGetErrorString(cudaGetLastError()));
-  }
-  return num_to_keep;
-}
diff --git a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp b/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
deleted file mode 100644
index 7428c997e94e9..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/nvjpegdecoder.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifdef __HAVE_NVJPEG__
-
-#include "autoware/tensorrt_bevdet/nvjpegdecoder.hpp"
-
-#include <chrono>
-
-using std::chrono::duration;
-using std::chrono::high_resolution_clock;
-
-int dev_malloc(void ** p, size_t s)
-{
-  return static_cast<int>(cudaMalloc(p, s));
-}
-
-int dev_free(void * p)
-{
-  return static_cast<int>(cudaFree(p));
-}
-
-int host_malloc(void ** p, size_t s, unsigned int f)
-{
-  return static_cast<int>(cudaHostAlloc(p, s, f));
-}
-
-int host_free(void * p)
-{
-  return static_cast<int>(cudaFreeHost(p));
-}
-
-int prepare_buffers(
-  const std::vector<std::vector<char>> & file_data, std::vector<int> & img_width,
-  std::vector<int> & img_height, std::vector<nvjpegImage_t> & ibuf,
-  std::vector<nvjpegImage_t> & isz, share_params & share_param)
-{
-  int widths[NVJPEG_MAX_COMPONENT];
-  int heights[NVJPEG_MAX_COMPONENT];
-  int channels;
-  nvjpegChromaSubsampling_t subsampling;
-
-  for (size_t i = 0; i < file_data.size(); i++) {
-    CHECK_NVJPEG(nvjpegGetImageInfo(
-      share_param.nvjpeg_handle, reinterpret_cast<uchar *>(file_data[i].data()),
-      file_data[i].size(), &channels, &subsampling, widths, heights));
-
-    int mul = 1;
-    // in the case of interleaved RGB output, write only to single channel, but
-    // 3 samples at once
-    if (share_param.fmt == NVJPEG_OUTPUT_RGBI || share_param.fmt == NVJPEG_OUTPUT_BGRI) {
-      channels = 1;
-      mul = 3;
-    } else if (share_param.fmt == NVJPEG_OUTPUT_RGB || share_param.fmt == NVJPEG_OUTPUT_BGR) {
-      // in the case of rgb create 3 buffers with sizes of original image
-      channels = 3;
-      widths[1] = widths[2] = widths[0];
-      heights[1] = heights[2] = heights[0];
-    }
-
-    if (img_width[i] != widths[0] || img_height[i] != heights[0]) {
-      img_width[i] = widths[0];
-      img_height[i] = heights[0];
-      // realloc output buffer if required
-      for (int c = 0; c < channels; c++) {
-        int aw = mul * widths[c];
-        int ah = heights[c];
-        size_t sz = aw * ah;
-        ibuf[i].pitch[c] = aw;
-        if (sz > isz[i].pitch[c]) {
-          if (ibuf[i].channel[c]) {
-            CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
-          }
-          CHECK_CUDA(cudaMalloc(reinterpret_cast<void **>(&ibuf[i].channel[c]), sz));
-          isz[i].pitch[c] = sz;
-        }
-      }
-    }
-  }
-  return EXIT_SUCCESS;
-}
-
-void create_decoupled_api_handles(std::vector<decode_params_t> & params, share_params & share_param)
-{
-  for (size_t i = 0; i < params.size(); i++) {
-    CHECK_NVJPEG(nvjpegDecoderCreate(
-      share_param.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &params[i].nvjpeg_decoder));
-    CHECK_NVJPEG(nvjpegDecoderStateCreate(
-      share_param.nvjpeg_handle, params[i].nvjpeg_decoder, &params[i].nvjpeg_decoupled_state));
-
-    CHECK_NVJPEG(
-      nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[0]));
-    CHECK_NVJPEG(
-      nvjpegBufferPinnedCreate(share_param.nvjpeg_handle, NULL, &params[i].pinned_buffers[1]));
-    CHECK_NVJPEG(
-      nvjpegBufferDeviceCreate(share_param.nvjpeg_handle, NULL, &params[i].device_buffer));
-
-    CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[0]));
-    CHECK_NVJPEG(nvjpegJpegStreamCreate(share_param.nvjpeg_handle, &params[i].jpeg_streams[1]));
-
-    CHECK_NVJPEG(
-      nvjpegDecodeParamsCreate(share_param.nvjpeg_handle, &params[i].nvjpeg_decode_params));
-  }
-}
-
-void destroy_decoupled_api_handles(
-  std::vector<decode_params_t> & params, share_params & share_param)
-{
-  for (size_t i = 0; i < params.size(); i++) {
-    CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params[i].nvjpeg_decode_params));
-
-    CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[0]));
-    CHECK_NVJPEG(nvjpegJpegStreamDestroy(params[i].jpeg_streams[1]));
-    CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[0]));
-    CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params[i].pinned_buffers[1]));
-    CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params[i].device_buffer));
-
-    CHECK_NVJPEG(nvjpegJpegStateDestroy(params[i].nvjpeg_decoupled_state));
-    CHECK_NVJPEG(nvjpegDecoderDestroy(params[i].nvjpeg_decoder));
-  }
-}
-
-void release_buffers(std::vector<nvjpegImage_t> & ibuf)
-{
-  for (size_t i = 0; i < ibuf.size(); i++) {
-    for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++)
-      if (ibuf[i].channel[c]) CHECK_CUDA(cudaFree(ibuf[i].channel[c]));
-  }
-}
-
-int get_img(
-  const uchar * d_chanR, int pitchR, const uchar * d_chanG, int pitchG, const uchar * d_chanB,
-  int pitchB, size_t width, size_t height, uchar * img)
-{
-  size_t step = width * height;
-
-  CHECK_CUDA(cudaMemcpy2D(
-    img + 0 * step, static_cast<size_t>(width), d_chanR, static_cast<size_t>(pitchR), width, height,
-    cudaMemcpyDeviceToDevice));
-  CHECK_CUDA(cudaMemcpy2D(
-    img + 1 * step, static_cast<size_t>(width), d_chanG, static_cast<size_t>(pitchG), width, height,
-    cudaMemcpyDeviceToDevice));
-  CHECK_CUDA(cudaMemcpy2D(
-    img + 2 * step, static_cast<size_t>(width), d_chanB, static_cast<size_t>(pitchB), width, height,
-    cudaMemcpyDeviceToDevice));
-
-  return EXIT_SUCCESS;
-}
-
-void decode_single_image(
-  const std::vector<char> & img_data, nvjpegImage_t & out, decode_params_t & params,
-  share_params & share_param, double & time)
-{
-  CHECK_CUDA(cudaStreamSynchronize(params.stream));
-  cudaEvent_t startEvent = NULL, stopEvent = NULL;
-  float loopTime = 0;
-  // cudaEventBlockingSync
-  CHECK_CUDA(cudaEventCreate(&startEvent));
-  CHECK_CUDA(cudaEventCreate(&stopEvent));
-
-  CHECK_CUDA(cudaEventRecord(startEvent, params.stream));
-
-  CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state, params.device_buffer));
-  int buffer_index = 0;
-  CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params, share_param.fmt));
-
-  CHECK_NVJPEG(nvjpegJpegStreamParse(
-    share_param.nvjpeg_handle, reinterpret_cast<const uchar *>(img_data.data()), img_data.size(), 0,
-    0, params.jpeg_streams[buffer_index]));
-
-  CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
-    params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
-
-  CHECK_NVJPEG(nvjpegDecodeJpegHost(
-    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
-    params.nvjpeg_decode_params, params.jpeg_streams[buffer_index]));
-
-  CHECK_CUDA(cudaStreamSynchronize(params.stream));
-
-  CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
-    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state,
-    params.jpeg_streams[buffer_index], params.stream));
-
-  buffer_index = 1 - buffer_index;  // switch pinned buffer in pipeline mode to avoid an extra sync
-  RCLCPP_DEBUG(rclcpp::get_logger("nvjpegDecoder"), buffer_index);
-
-  CHECK_NVJPEG(nvjpegDecodeJpegDevice(
-    share_param.nvjpeg_handle, params.nvjpeg_decoder, params.nvjpeg_decoupled_state, &out,
-    params.stream));
-
-  // CHECK_CUDA(cudaStreamSynchronize(params.stream)); //TODO new add
-
-  CHECK_CUDA(cudaEventRecord(stopEvent, params.stream));
-
-  CHECK_CUDA(cudaEventSynchronize(stopEvent));
-  CHECK_CUDA(cudaEventElapsedTime(&loopTime, startEvent, stopEvent));
-  time = 0.001 * static_cast<double>(loopTime);  // cudaEventElapsedTime returns milliseconds
-                                                 // time = 1.0;
-}
-
-nvjpegDecoder::nvjpegDecoder(size_t n, size_t _fmt)
-: N_img(n), iout(n), isz(n), widths(n), heights(n), params(n), share_param(_fmt)
-{
-  init();
-}
-
-int nvjpegDecoder::decode(const std::vector<std::vector<char>> & files_data, uchar * out_imgs)
-{
-  for (size_t i = 0; i < params.size(); i++) {
-    CHECK_CUDA(cudaStreamCreate(&params[i].stream));
-  }
-  if (prepare_buffers(files_data, widths, heights, iout, isz, share_param)) return EXIT_FAILURE;
-
-  double times[6];
-
-  auto decode_start = high_resolution_clock::now();
-  std::vector<std::thread> threads(files_data.size());
-  for (size_t i = 0; i < files_data.size(); i++) {
-    threads[i] = std::thread(
-      decode_single_image, std::ref(files_data[i]), std::ref(iout[i]), std::ref(params[i]),
-      std::ref(share_param), std::ref(times[i]));
-  }
-  for (size_t i = 0; i < files_data.size(); i++) {
-    threads[i].join();
-  }
-  auto decode_end = high_resolution_clock::now();
-  duration<double> decode_time = decode_end - decode_start;
-
-  RCLCPP_INFO(
-    rclcpp::get_logger("nvjpegDecoder"), "Decode total time : %.4lf ms",
-    decode_time.count() * 1000);
-
-  for (size_t i = 0; i < files_data.size(); i++) {
-    get_img(
-      iout[i].channel[0], iout[i].pitch[0], iout[i].channel[1], iout[i].pitch[1],
-      iout[i].channel[2], iout[i].pitch[2], widths[i], heights[i],
-      out_imgs + i * 3 * widths[i] * heights[i]);
-  }
-
-  for (size_t i = 0; i < params.size(); i++) {
-    CHECK_CUDA(cudaStreamDestroy(params[i].stream));
-  }
-  return EXIT_SUCCESS;
-}
-
-int nvjpegDecoder::init()
-{
-  nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
-  nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
-
-  nvjpegStatus_t status = nvjpegCreateEx(
-    NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
-    &share_param.nvjpeg_handle);
-  share_param.hw_decode_available = true;
-  if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
-    RCLCPP_WARN(
-      rclcpp::get_logger("nvjpegDecoder"),
-      "Hardware Decoder not supported. Falling back to default backend");
-    CHECK_NVJPEG(nvjpegCreateEx(
-      NVJPEG_BACKEND_DEFAULT, &dev_allocator, &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
-      &share_param.nvjpeg_handle));
-    share_param.hw_decode_available = false;
-  } else {
-    CHECK_NVJPEG(status);
-  }
-
-  CHECK_NVJPEG(nvjpegJpegStateCreate(share_param.nvjpeg_handle, &share_param.nvjpeg_state));
-
-  create_decoupled_api_handles(params, share_param);
-
-  for (size_t i = 0; i < iout.size(); i++) {
-    for (size_t c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
-      iout[i].channel[c] = NULL;
-      iout[i].pitch[c] = 0;
-      isz[i].pitch[c] = 0;
-    }
-  }
-  return EXIT_SUCCESS;
-}
-
-nvjpegDecoder::~nvjpegDecoder()
-{
-  release_buffers(iout);
-  destroy_decoupled_api_handles(params, share_param);
-  CHECK_NVJPEG(nvjpegJpegStateDestroy(share_param.nvjpeg_state));
-  CHECK_NVJPEG(nvjpegDestroy(share_param.nvjpeg_handle));
-}
-
-#endif
diff --git a/perception/autoware_tensorrt_bevdet/src/postprocess.cu b/perception/autoware_tensorrt_bevdet/src/postprocess.cu
deleted file mode 100644
index c7ac66a3236e6..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/postprocess.cu
+++ /dev/null
@@ -1,212 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "autoware/tensorrt_bevdet/postprocess.hpp"
-
-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-#include <thrust/sequence.h>
-#include <thrust/sort.h>
-#include <thrust/transform.h>
-
-__device__ float sigmoid_gpu(const float x)
-{
-  return 1.0f / (1.0f + expf(-x));
-}
-
-__global__ void BEVDecodeObjectKernel(
-  const int map_size,        // 40000
-  const float score_thresh,  // 0.1
-                             //    const int nms_pre_max_size, // 4096
-  const float x_start, const float y_start, const float x_step, const float y_step,
-  const int output_h, const int output_w, const int downsample_size, const int num_class_in_task,
-  const int cls_range, const float * reg, const float * hei, const float * dim, const float * rot,
-  const float * vel, const float * cls, float * res_box, float * res_conf, int * res_cls,
-  int * res_box_num, float * rescale_factor)
-{  // According to the confidence level, the initial screening showed that there were res-box_num
-   // boxes after screening, not exceeding nms_pre_max_size 4096
-  int idx = threadIdx.x + blockDim.x * blockIdx.x;
-  if (idx >= map_size) return;
-
-  float max_score = cls[idx];  // Initialize as the first class of the task
-  int label = cls_range;       // Initialize as the first class of the task
-  for (int i = 1; i < num_class_in_task; ++i) {
-    float cur_score = cls[idx + i * map_size];
-    if (cur_score > max_score) {
-      max_score = cur_score;
-      label = i + cls_range;
-    }
-  }
-
-  int coor_x = idx % output_h;  //
-  int coor_y = idx / output_w;  //
-
-  float conf = sigmoid_gpu(max_score);  // Calculate confidence level
-  if (conf > score_thresh) {
-    int cur_valid_box_id = atomicAdd(res_box_num, 1);
-    res_box[cur_valid_box_id * kBoxBlockSize + 0] =
-      (reg[idx + 0 * map_size] + coor_x) * x_step + x_start;
-    res_box[cur_valid_box_id * kBoxBlockSize + 1] =
-      (reg[idx + 1 * map_size] + coor_y) * y_step + y_start;
-    res_box[cur_valid_box_id * kBoxBlockSize + 2] = hei[idx];
-    res_box[cur_valid_box_id * kBoxBlockSize + 3] =
-      expf(dim[idx + 0 * map_size]) * rescale_factor[label];  // nms scale
-    res_box[cur_valid_box_id * kBoxBlockSize + 4] =
-      expf(dim[idx + 1 * map_size]) * rescale_factor[label];
-    res_box[cur_valid_box_id * kBoxBlockSize + 5] =
-      expf(dim[idx + 2 * map_size]) * rescale_factor[label];
-    res_box[cur_valid_box_id * kBoxBlockSize + 6] = atan2f(rot[idx], rot[idx + map_size]);
-    res_box[cur_valid_box_id * kBoxBlockSize + 7] = vel[idx];
-    res_box[cur_valid_box_id * kBoxBlockSize + 8] = vel[idx + map_size];
-
-    res_conf[cur_valid_box_id] = conf;
-    res_cls[cur_valid_box_id] = label;
-  }
-}
-
-PostprocessGPU::PostprocessGPU(
-  const int _class_num, const float _score_thresh, const float _nms_thresh,
-  const int _nms_pre_maxnum, const int _nms_post_maxnum, const int _down_sample,
-  const int _output_h, const int _output_w, const float _x_step, const float _y_step,
-  const float _x_start, const float _y_start, const std::vector<int> & _class_num_pre_task,
-  const std::vector<float> & _nms_rescale_factor)
-: class_num(_class_num),
-  score_thresh(_score_thresh),
-  nms_thresh(_nms_thresh),
-  nms_pre_maxnum(_nms_pre_maxnum),
-  nms_post_maxnum(_nms_post_maxnum),
-  down_sample(_down_sample),
-  output_h(_output_h),
-  output_w(_output_w),
-  x_step(_x_step),
-  y_step(_y_step),
-  x_start(_x_start),
-  y_start(_y_start),
-  map_size(output_h * output_w),
-  class_num_pre_task(_class_num_pre_task),
-  nms_rescale_factor(_nms_rescale_factor),
-  task_num(_class_num_pre_task.size())
-{
-  CHECK_CUDA(cudaMalloc((void **)&boxes_dev, kBoxBlockSize * map_size * sizeof(float)));
-  CHECK_CUDA(cudaMalloc((void **)&score_dev, map_size * sizeof(float)));
-  CHECK_CUDA(cudaMalloc((void **)&cls_dev, map_size * sizeof(int)));
-  CHECK_CUDA(cudaMalloc((void **)&sorted_indices_dev, map_size * sizeof(int)));
-  CHECK_CUDA(cudaMalloc((void **)&valid_box_num, sizeof(int)));
-  CHECK_CUDA(cudaMalloc((void **)&nms_rescale_factor_dev, class_num * sizeof(float)));
-
-  CHECK_CUDA(cudaMallocHost((void **)&boxes_host, kBoxBlockSize * map_size * sizeof(float)));
-  CHECK_CUDA(cudaMallocHost((void **)&score_host, nms_pre_maxnum * sizeof(float)));
-  CHECK_CUDA(cudaMallocHost((void **)&cls_host, map_size * sizeof(float)));
-  CHECK_CUDA(cudaMallocHost((void **)&sorted_indices_host, nms_pre_maxnum * sizeof(int)));
-  CHECK_CUDA(cudaMallocHost((void **)&keep_data_host, nms_pre_maxnum * sizeof(std::int64_t)));
-
-  CHECK_CUDA(cudaMemcpy(
-    nms_rescale_factor_dev, nms_rescale_factor.data(), class_num * sizeof(float),
-    cudaMemcpyHostToDevice));
-
-  iou3d_nms.reset(new Iou3dNmsCuda(output_h, output_w, nms_thresh));
-
-  std::ostringstream oss;
-  for (auto i = 0; i < nms_rescale_factor.size(); i++) {
-    oss << std::fixed << std::setprecision(2) << nms_rescale_factor[i] << ' ';
-  }
-  RCLCPP_INFO(rclcpp::get_logger("PostprocessGPU"), "%s", oss.str().c_str());
-}
-PostprocessGPU::~PostprocessGPU()
-{
-  CHECK_CUDA(cudaFree(boxes_dev));
-  CHECK_CUDA(cudaFree(score_dev));
-  CHECK_CUDA(cudaFree(cls_dev));
-  CHECK_CUDA(cudaFree(sorted_indices_dev));
-  CHECK_CUDA(cudaFree(valid_box_num));
-  CHECK_CUDA(cudaFree(nms_rescale_factor_dev));
-
-  CHECK_CUDA(cudaFreeHost(boxes_host));
-  CHECK_CUDA(cudaFreeHost(score_host));
-  CHECK_CUDA(cudaFreeHost(cls_host));
-  CHECK_CUDA(cudaFreeHost(sorted_indices_host));
-  CHECK_CUDA(cudaFreeHost(keep_data_host));
-}
-
-void PostprocessGPU::DoPostprocess(void ** const bev_buffer, std::vector<Box> & out_detections)
-{
-  // bev_buffer : BEV_feat, reg_0, hei_0, dim_0, rot_0, vel_0, heatmap_0, reg_1 ...
-  const int task_num = class_num_pre_task.size();
-  int cur_start_label = 0;
-  for (int i = 0; i < task_num; i++) {
-    float * reg = (float *)bev_buffer[i * 6 + 0];      // 2 x 128 x 128
-    float * hei = (float *)bev_buffer[i * 6 + 1];      // 1 x 128 x 128
-    float * dim = (float *)bev_buffer[i * 6 + 2];      // 3 x 128 x 128
-    float * rot = (float *)bev_buffer[i * 6 + 3];      // 2 x 128 x 128
-    float * vel = (float *)bev_buffer[i * 6 + 4];      // 2 x 128 x 128
-    float * heatmap = (float *)bev_buffer[i * 6 + 5];  // c x 128 x 128
-
-    dim3 grid(DIVUP(map_size, NUM_THREADS));
-    CHECK_CUDA(cudaMemset(valid_box_num, 0, sizeof(int)));
-    BEVDecodeObjectKernel<<<grid, NUM_THREADS>>>(
-      map_size, score_thresh, x_start, y_start, x_step, y_step, output_h, output_w, down_sample,
-      class_num_pre_task[i], cur_start_label, reg, hei, dim, rot, vel, heatmap, boxes_dev,
-      score_dev, cls_dev, valid_box_num, nms_rescale_factor_dev);
-
-    /*
-    at this point, boxes_dev, score_dev, cls_dev have valid_box_num elements，which may be greater
-    than nms_pre_maxnum, and it's arranged in disorder
-    */
-    int box_num_pre = 0;
-    CHECK_CUDA(cudaMemcpy(&box_num_pre, valid_box_num, sizeof(int), cudaMemcpyDeviceToHost));
-
-    thrust::sequence(thrust::device, sorted_indices_dev, sorted_indices_dev + box_num_pre);
-    thrust::sort_by_key(
-      thrust::device, score_dev, score_dev + box_num_pre, sorted_indices_dev,
-      thrust::greater<float>());
-    // score_dev is sorted in descending order, while sorted_indices_dev indexes the original
-    // order,， sorted_indices_dev[i] = j; i:current，j:pre-index;  j:[0, map_size]
-
-    box_num_pre = std::min(box_num_pre, nms_pre_maxnum);
-
-    int box_num_post =
-      iou3d_nms->DoIou3dNms(box_num_pre, boxes_dev, sorted_indices_dev, keep_data_host);
-
-    box_num_post = std::min(box_num_post, nms_post_maxnum);
-
-    CHECK_CUDA(cudaMemcpy(
-      sorted_indices_host, sorted_indices_dev, box_num_pre * sizeof(int), cudaMemcpyDeviceToHost));
-    CHECK_CUDA(cudaMemcpy(
-      boxes_host, boxes_dev, kBoxBlockSize * map_size * sizeof(float), cudaMemcpyDeviceToHost));
-    CHECK_CUDA(
-      cudaMemcpy(score_host, score_dev, box_num_pre * sizeof(float), cudaMemcpyDeviceToHost));
-    CHECK_CUDA(cudaMemcpy(cls_host, cls_dev, map_size * sizeof(float), cudaMemcpyDeviceToHost));
-
-    for (auto j = 0; j < box_num_post; j++) {
-      int k = keep_data_host[j];
-      int idx = sorted_indices_host[k];
-      Box box;
-      box.x = boxes_host[idx * kBoxBlockSize + 0];
-      box.y = boxes_host[idx * kBoxBlockSize + 1];
-      box.z = boxes_host[idx * kBoxBlockSize + 2];
-      box.l = boxes_host[idx * kBoxBlockSize + 3] / nms_rescale_factor[cls_host[idx]];
-      box.w = boxes_host[idx * kBoxBlockSize + 4] / nms_rescale_factor[cls_host[idx]];
-      box.h = boxes_host[idx * kBoxBlockSize + 5] / nms_rescale_factor[cls_host[idx]];
-      box.r = boxes_host[idx * kBoxBlockSize + 6];
-      box.vx = boxes_host[idx * kBoxBlockSize + 7];
-      box.vy = boxes_host[idx * kBoxBlockSize + 8];
-
-      box.label = cls_host[idx];
-      box.score = score_host[k];
-      box.z -= box.h * 0.5;  // bottom height
-      out_detections.push_back(box);
-    }
-
-    cur_start_label += class_num_pre_task[i];
-  }
-}
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess.cu b/perception/autoware_tensorrt_bevdet/src/preprocess.cu
deleted file mode 100644
index 2a1bdc426a42f..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/preprocess.cu
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2024 AutoCore, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "autoware/tensorrt_bevdet/preprocess.hpp"
-
-#include <thrust/fill.h>
-
-#include <fstream>
-
-__global__ void convert_RGBHWC_to_BGRCHW_kernel(
-  uchar * input, uchar * output, int channels, int height, int width)
-{
-  int index = blockIdx.x * blockDim.x + threadIdx.x;
-  if (index < channels * height * width) {
-    int y = index / 3 / width;
-    int x = index / 3 % width;
-    int c = 2 - index % 3;  // RGB to BGR
-
-    output[c * height * width + y * width + x] = input[index];
-  }
-}
-// RGBHWC to BGRCHW
-void convert_RGBHWC_to_BGRCHW(uchar * input, uchar * output, int channels, int height, int width)
-{
-  convert_RGBHWC_to_BGRCHW_kernel<<<DIVUP(channels * height * width, NUM_THREADS), NUM_THREADS>>>(
-    input, output, channels, height, width);
-}
diff --git a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu b/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
deleted file mode 100644
index 25abf687aaae8..0000000000000
--- a/perception/autoware_tensorrt_bevdet/src/preprocess_plugin.cu
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "autoware/tensorrt_bevdet/common.hpp"
-#include "autoware/tensorrt_bevdet/preprocess_plugin.hpp"
-
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-
-#include <iostream>
-
-// kernel for GPU
-
-template <typename T>
-__global__ void preprocess_kernel(
-  const uint8_t * src_dev, T * dst_dev, int src_row_step, int dst_row_step, int src_img_step,
-  int dst_img_step, int src_h, int src_w, float radio_h, float radio_w, float offset_h,
-  float offset_w, const float * mean, const float * std, int dst_h, int dst_w, int n)
-{
-  int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if (idx >= dst_h * dst_w * n) return;
-
-  int i = (idx / n) / dst_w;
-  int j = (idx / n) % dst_w;
-  int k = idx % n;
-
-  int pX = (int)roundf((i / radio_h) + offset_h);
-  int pY = (int)roundf((j / radio_w) + offset_w);
-
-  if (pX < src_h && pX >= 0 && pY < src_w && pY >= 0) {
-    int s1 = k * src_img_step + 0 * src_img_step / 3 + pX * src_row_step + pY;
-    int s2 = k * src_img_step + 1 * src_img_step / 3 + pX * src_row_step + pY;
-    int s3 = k * src_img_step + 2 * src_img_step / 3 + pX * src_row_step + pY;
-
-    int d1 = k * dst_img_step + 0 * dst_img_step / 3 + i * dst_row_step + j;
-    int d2 = k * dst_img_step + 1 * dst_img_step / 3 + i * dst_row_step + j;
-    int d3 = k * dst_img_step + 2 * dst_img_step / 3 + i * dst_row_step + j;
-
-    dst_dev[d1] = (static_cast<T>(src_dev[s1]) - static_cast<T>(mean[0])) / static_cast<T>(std[0]);
-    dst_dev[d2] = (static_cast<T>(src_dev[s2]) - static_cast<T>(mean[1])) / static_cast<T>(std[1]);
-    dst_dev[d3] = (static_cast<T>(src_dev[s3]) - static_cast<T>(mean[2])) / static_cast<T>(std[2]);
-  }
-}
-
-namespace nvinfer1
-{
-// class PreprocessPlugin
-PreprocessPlugin::PreprocessPlugin(
-  const std::string & name, int crop_h, int crop_w, float resize_radio)
-: name_(name)
-{
-  m_.crop_h = crop_h;
-  m_.crop_w = crop_w;
-  m_.resize_radio = resize_radio;
-}
-
-PreprocessPlugin::PreprocessPlugin(const std::string & name, const void * buffer, size_t length)
-: name_(name)
-{
-  memcpy(&m_, buffer, sizeof(m_));
-}
-
-PreprocessPlugin::~PreprocessPlugin()
-{
-}
-
-IPluginV2DynamicExt * PreprocessPlugin::clone() const noexcept
-{
-  auto p = new PreprocessPlugin(name_, &m_, sizeof(m_));
-  p->setPluginNamespace(namespace_.c_str());
-  return p;
-}
-
-int32_t PreprocessPlugin::getNbOutputs() const noexcept
-{
-  return 1;
-}
-
-DataType PreprocessPlugin::getOutputDataType(
-  int32_t index, DataType const * inputTypes, int32_t nbInputs) const noexcept
-{
-  // return DataType::kHALF;
-  return DataType::kFLOAT;
-}
-
-DimsExprs PreprocessPlugin::getOutputDimensions(
-  int32_t outputIndex, const DimsExprs * inputs, int32_t nbInputs,
-  IExprBuilder & exprBuilder) noexcept
-{
-  int input_h = inputs[0].d[2]->getConstantValue();
-  int input_w = inputs[0].d[3]->getConstantValue() * 4;
-
-  int output_h = input_h * m_.resize_radio - m_.crop_h;
-  int output_w = input_w * m_.resize_radio - m_.crop_w;
-
-  DimsExprs ret;
-  ret.nbDims = inputs[0].nbDims;
-  ret.d[0] = inputs[0].d[0];
-  ret.d[1] = inputs[0].d[1];
-  ret.d[2] = exprBuilder.constant(output_h);
-  ret.d[3] = exprBuilder.constant(output_w);
-
-  return ret;
-}
-
-bool PreprocessPlugin::supportsFormatCombination(
-  int32_t pos, const PluginTensorDesc * inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
-{
-  bool res;
-  switch (pos) {
-    case 0:  // images
-      res = (inOut[0].type == DataType::kINT8 || inOut[0].type == DataType::kINT32) &&
-            inOut[0].format == TensorFormat::kLINEAR;
-      break;
-    case 1:  // mean
-      res = (inOut[1].type == DataType::kFLOAT) && inOut[1].format == TensorFormat::kLINEAR;
-      break;
-    case 2:  // std
-      res = (inOut[2].type == DataType::kFLOAT) && inOut[2].format == TensorFormat::kLINEAR;
-      break;
-    case 3:  // output img tensor
-      res = (inOut[3].type == DataType::kFLOAT || inOut[3].type == DataType::kHALF) &&
-            inOut[3].format == inOut[0].format;
-
-      // res = inOut[3].type == DataType::kHALF && inOut[3].format == inOut[0].format;
-      break;
-    default:
-      res = false;
-  }
-  return res;
-}
-
-void PreprocessPlugin::configurePlugin(
-  const DynamicPluginTensorDesc * in, int32_t nbInputs, const DynamicPluginTensorDesc * out,
-  int32_t nbOutputs) noexcept
-{
-  return;
-}
-
-size_t PreprocessPlugin::getWorkspaceSize(
-  const PluginTensorDesc * inputs, int32_t nbInputs, const PluginTensorDesc * outputs,
-  int32_t nbOutputs) const noexcept
-{
-  return 0;
-}
-
-int32_t PreprocessPlugin::enqueue(
-  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
-  const void * const * inputs, void * const * outputs, void * workspace,
-  cudaStream_t stream) noexcept
-{
-  int n_img = inputDesc[0].dims.d[0];
-  int src_img_h = inputDesc[0].dims.d[2];
-  int src_img_w = inputDesc[0].dims.d[3] * 4;
-
-  int dst_img_h = outputDesc[0].dims.d[2];
-  int dst_img_w = outputDesc[0].dims.d[3];
-
-  int src_row_step = src_img_w;
-  int dst_row_step = dst_img_w;
-
-  int src_img_step = src_img_w * src_img_h * 3;
-  int dst_img_step = dst_img_w * dst_img_h * 3;
-
-  float offset_h = m_.crop_h / m_.resize_radio;
-  float offset_w = m_.crop_w / m_.resize_radio;
-
-  dim3 grid(DIVUP(dst_img_h * dst_img_w * n_img, NUM_THREADS));
-  dim3 block(NUM_THREADS);
-
-  switch (int(outputDesc[0].type)) {
-    case int(DataType::kFLOAT):
-      // float
-      preprocess_kernel<<<grid, block, 0, stream>>>(
-        reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<float *>(outputs[0]),
-        src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
-        m_.resize_radio, m_.resize_radio, offset_h, offset_w,
-        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const float *>(inputs[2]),
-        dst_img_h, dst_img_w, n_img);
-      break;
-    case int(DataType::kHALF):
-      // half
-      preprocess_kernel<<<grid, block, 0, stream>>>(
-        reinterpret_cast<const uint8_t *>(inputs[0]), reinterpret_cast<__half *>(outputs[0]),
-        src_row_step, dst_row_step, src_img_step, dst_img_step, src_img_h, src_img_w,
-        m_.resize_radio, m_.resize_radio, offset_h, offset_w,
-        reinterpret_cast<const float *>(inputs[1]), reinterpret_cast<const float *>(inputs[2]),
-        dst_img_h, dst_img_w, n_img);
-
-      break;
-    default:  // should NOT be here
-      RCLCPP_ERROR(rclcpp::get_logger("PreprocessPlugin"), "\tUnsupport datatype!");
-  }
-  return 0;
-}
-
-void PreprocessPlugin::destroy() noexcept
-{
-  delete this;
-  return;
-}
-
-int32_t PreprocessPlugin::initialize() noexcept
-{
-  return 0;
-}
-
-void PreprocessPlugin::terminate() noexcept
-{
-  return;
-}
-
-size_t PreprocessPlugin::getSerializationSize() const noexcept
-{
-  return sizeof(m_);
-}
-
-void PreprocessPlugin::serialize(void * buffer) const noexcept
-{
-  memcpy(buffer, &m_, sizeof(m_));
-  return;
-}
-
-void PreprocessPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * PreprocessPlugin::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * PreprocessPlugin::getPluginType() const noexcept
-{
-  return PRE_PLUGIN_NAME;
-}
-
-const char * PreprocessPlugin::getPluginVersion() const noexcept
-{
-  return PRE_PLUGIN_VERSION;
-}
-
-void PreprocessPlugin::attachToContext(
-  cudnnContext * contextCudnn, cublasContext * contextCublas, IGpuAllocator * gpuAllocator) noexcept
-{
-  return;
-}
-
-void PreprocessPlugin::detachFromContext() noexcept
-{
-  return;
-}
-
-// class PreprocessPluginCreator
-PluginFieldCollection PreprocessPluginCreator::fc_{};
-std::vector<PluginField> PreprocessPluginCreator::attr_;
-
-PreprocessPluginCreator::PreprocessPluginCreator()
-{
-  attr_.clear();
-  attr_.emplace_back(PluginField("crop_h", nullptr, PluginFieldType::kINT32, 1));
-  attr_.emplace_back(PluginField("crop_w", nullptr, PluginFieldType::kINT32, 1));
-  attr_.emplace_back(PluginField("resize_radio", nullptr, PluginFieldType::kFLOAT32, 1));
-
-  fc_.nbFields = attr_.size();
-  fc_.fields = attr_.data();
-}
-
-PreprocessPluginCreator::~PreprocessPluginCreator()
-{
-}
-
-IPluginV2DynamicExt * PreprocessPluginCreator::createPlugin(
-  const char * name, const PluginFieldCollection * fc) noexcept
-{
-  const PluginField * fields = fc->fields;
-
-  int crop_h = -1;
-  int crop_w = -1;
-  float resize_radio = 0.f;
-
-  for (int i = 0; i < fc->nbFields; ++i) {
-    if (std::string(fc->fields[i].name) == std::string("crop_h")) {
-      crop_h = *reinterpret_cast<const int *>(fc->fields[i].data);
-    } else if (std::string(fc->fields[i].name) == std::string("crop_w")) {
-      crop_w = *reinterpret_cast<const int *>(fc->fields[i].data);
-    } else if (std::string(fc->fields[i].name) == std::string("resize_radio")) {
-      resize_radio = *reinterpret_cast<const float *>(fc->fields[i].data);
-    }
-  }
-  PreprocessPlugin * pObj = new PreprocessPlugin(name, crop_h, crop_w, resize_radio);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-IPluginV2DynamicExt * PreprocessPluginCreator::deserializePlugin(
-  const char * name, const void * serialData, size_t serialLength) noexcept
-{
-  PreprocessPlugin * pObj = new PreprocessPlugin(name, serialData, serialLength);
-  pObj->setPluginNamespace(namespace_.c_str());
-  return pObj;
-}
-
-void PreprocessPluginCreator::setPluginNamespace(const char * pluginNamespace) noexcept
-{
-  namespace_ = pluginNamespace;
-  return;
-}
-
-const char * PreprocessPluginCreator::getPluginNamespace() const noexcept
-{
-  return namespace_.c_str();
-}
-
-const char * PreprocessPluginCreator::getPluginName() const noexcept
-{
-  return PRE_PLUGIN_NAME;
-}
-
-const char * PreprocessPluginCreator::getPluginVersion() const noexcept
-{
-  return PRE_PLUGIN_VERSION;
-}
-
-const PluginFieldCollection * PreprocessPluginCreator::getFieldNames() noexcept
-{
-  return &fc_;
-}
-
-REGISTER_TENSORRT_PLUGIN(PreprocessPluginCreator);
-
-}  // namespace nvinfer1
diff --git a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
new file mode 100644
index 0000000000000..d2a78f32722f4
--- /dev/null
+++ b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
@@ -0,0 +1,117 @@
+// Copyright 2024 AutoCore, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/tensorrt_bevdet/ros_utils.hpp"
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
+
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+#include <autoware_perception_msgs/msg/shape.hpp>
+
+namespace autoware::tensorrt_bevdet
+{
+using Label = autoware_perception_msgs::msg::ObjectClassification;
+
+uint8_t getSemanticType(const std::string & class_name)
+{
+  if (class_name == "car") {
+    return Label::CAR;
+  } else if (class_name == "truck") {
+    return Label::TRUCK;
+  } else if (class_name == "bus") {
+    return Label::BUS;
+  } else if (class_name == "trailer") {
+    return Label::TRAILER;
+  } else if (class_name == "bicycle") {
+    return Label::BICYCLE;
+  } else if (class_name == "motorcycle") {
+    return Label::MOTORCYCLE;
+  } else if (class_name == "pedestrian") {
+    return Label::PEDESTRIAN;
+  } else {
+    return Label::UNKNOWN;
+  }
+}
+
+void box3DToDetectedObjects(
+  const std::vector<Box> & boxes, autoware_perception_msgs::msg::DetectedObjects & bevdet_objects,
+  const std::vector<std::string> & class_names, float score_thre, const bool has_twist = true)
+{
+  for (auto b : boxes) {
+    if (b.score < score_thre) continue;
+    autoware_perception_msgs::msg::DetectedObject obj;
+
+    Eigen::Vector3f center(b.x, b.y, b.z + b.h / 2.);
+
+    obj.existence_probability = b.score;
+    // classification
+    autoware_perception_msgs::msg::ObjectClassification classification;
+    classification.probability = 1.0f;
+    if (b.label >= 0 && static_cast<size_t>(b.label) < class_names.size()) {
+      classification.label = getSemanticType(class_names[b.label]);
+    } else {
+      classification.label = Label::UNKNOWN;
+    }
+    obj.classification.emplace_back(classification);
+
+    // pose and shape
+    geometry_msgs::msg::Point p;
+    p.x = center.x();
+    p.y = center.y();
+    p.z = center.z();
+    obj.kinematics.pose_with_covariance.pose.position = p;
+
+    tf2::Quaternion q;
+    q.setRPY(0, 0, b.r);
+    obj.kinematics.pose_with_covariance.pose.orientation = tf2::toMsg(q);
+
+    obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX;
+
+    geometry_msgs::msg::Vector3 v;
+    v.x = b.l;
+    v.y = b.w;
+    v.z = b.h;
+    obj.shape.dimensions = v;
+    if (has_twist) {
+      float vel_x = b.vx;
+      float vel_y = b.vy;
+      geometry_msgs::msg::Twist twist;
+      twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
+      twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - b.r);
+      obj.kinematics.twist_with_covariance.twist = twist;
+      obj.kinematics.has_twist = has_twist;
+    }
+
+    bevdet_objects.objects.emplace_back(obj);
+  }
+}
+
+void getTransform(
+  const geometry_msgs::msg::TransformStamped & transform, Eigen::Quaternion<float> & rot,
+  Eigen::Translation3f & translation)
+{
+  rot = Eigen::Quaternion<float>(
+    transform.transform.rotation.w, transform.transform.rotation.x, transform.transform.rotation.y,
+    transform.transform.rotation.z);
+  translation = Eigen::Translation3f(
+    transform.transform.translation.x, transform.transform.translation.y,
+    transform.transform.translation.z);
+}
+
+void getCameraIntrinsics(
+  const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics)
+{
+  intrinsics << msg->k[0], msg->k[1], msg->k[2], msg->k[3], msg->k[4], msg->k[5], msg->k[6],
+    msg->k[7], msg->k[8];
+}
+} // namespace autoware::tensorrt_bevdet
\ No newline at end of file

From b1543578ed1b1d56a4375baa762c265d496b92fd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 06:56:25 +0000
Subject: [PATCH 78/85] style(pre-commit): autofix

---
 perception/autoware_tensorrt_bevdet/README.md       |  4 +++-
 .../autoware/tensorrt_bevdet/bevdet_node.hpp        | 13 +++++++------
 .../include/autoware/tensorrt_bevdet/ros_utils.hpp  | 11 ++++++-----
 perception/autoware_tensorrt_bevdet/package.xml     |  4 ++--
 .../autoware_tensorrt_bevdet/src/bevdet_node.cpp    |  7 ++++---
 .../autoware_tensorrt_bevdet/src/ros_utils.cpp      |  4 ++--
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index fc508d56699e7..a4a181aa24886 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -7,10 +7,12 @@ The core algorithm, named `BEVDet`, it unifies multi-view images into the perspe
 ## Inner-workings / Algorithms
 
 ### Cite
+
 <!-- cspell: ignore Junjie Huang, Guan Huang -->
+
 - Junjie Huang, Guan Huang, "BEVPoolv2: A Cutting-edge Implementation of BEVDet Toward Deployment", [[ref](https://arxiv.org/pdf/2211.17111)]
 - [bevdet_vendor]() package are copied from the [original codes](https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one) (The TensorRT, C++ implementation by LCH1238) and modified.
-- This package is ported version toward Autoware from [bevdet_vendor](). 
+- This package is ported version toward Autoware from [bevdet_vendor]().
 
 ## Inputs / Outputs
 
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 25ef4bd730e95..2cd90cfc0180e 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -17,8 +17,6 @@
 #define AUTOWARE__TENSORRT_BEVDET__BEVDET_NODE_HPP_
 
 #include "autoware/tensorrt_bevdet/ros_utils.hpp"
-#include <cpu_jpegdecoder.h>
-
 #include "tf2_ros/buffer.h"
 #include "tf2_ros/transform_listener.h"
 
@@ -28,6 +26,7 @@
 
 #include <sensor_msgs/msg/image.hpp>
 
+#include <cpu_jpegdecoder.h>
 #include <cv_bridge/cv_bridge.h>
 #include <message_filters/subscriber.h>
 #include <message_filters/sync_policies/approximate_time.h>
@@ -68,12 +67,12 @@ class TRTBEVDetNode : public rclcpp::Node
   std::vector<std::string> imgs_name_;    ///< Names of the images
   std::vector<std::string> class_names_;  ///< Names of the object classes
 
-  camsData sampleData_;  ///< Sample data for camera parameters
+  camsData sampleData_;             ///< Sample data for camera parameters
   std::shared_ptr<BEVDet> bevdet_;  ///< Object for performing object detection
 
   uchar * imgs_dev_ = nullptr;  ///< Device pointer for storing the images
   float score_thre_;            ///< Score threshold for object detection
-  bool has_twist_ = true;  /// wether set twist for objects
+  bool has_twist_ = true;       /// wether set twist for objects
 
   rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjects>::SharedPtr
     pub_boxes_;  ///< Publisher for publishing the detected objects
@@ -92,8 +91,10 @@ class TRTBEVDetNode : public rclcpp::Node
   rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr
     sub_br_caminfo_;  ///< Subscriber for back-right camera info
 
-  std::vector<bool> caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
-  bool camera_info_received_flag_ = false;  ///< Flag indicating if camera info has been received for all cameras
+  std::vector<bool>
+    caminfo_received_;  ///< Flag indicating if camera info has been received for each camera
+  bool camera_info_received_flag_ =
+    false;                    ///< Flag indicating if camera info has been received for all cameras
   bool initialized_ = false;  /// Flag indicating if img_w_ and img_h_ has been initialized
 
   // tf listener
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
index 9bfd16d70c658..7a5b400c25d2b 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
@@ -16,14 +16,16 @@
 #define AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
 
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
-#include <sensor_msgs/msg/camera_info.hpp>
 #include <geometry_msgs/msg/point32.hpp>
 #include <geometry_msgs/msg/quaternion.hpp>
 #include <geometry_msgs/msg/vector3.hpp>
+#include <sensor_msgs/msg/camera_info.hpp>
 #include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
+
+#include <bevdet.h>
+
 #include <string>
 #include <vector>
-#include <bevdet.h>
 namespace autoware::tensorrt_bevdet
 {
 uint8_t getSemanticType(const std::string & class_name);
@@ -41,6 +43,5 @@ void getTransform(
 void getCameraIntrinsics(
   const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
 
-
-} // namespace autoware::tensorrt_bevdet
-#endif  // AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
\ No newline at end of file
+}  // namespace autoware::tensorrt_bevdet
+#endif  // AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/package.xml b/perception/autoware_tensorrt_bevdet/package.xml
index f73c16e811d77..b10af49221a5c 100644
--- a/perception/autoware_tensorrt_bevdet/package.xml
+++ b/perception/autoware_tensorrt_bevdet/package.xml
@@ -11,8 +11,9 @@
 
   <buildtool_depend>ament_cmake_auto</buildtool_depend>
   <buildtool_depend>autoware_cmake</buildtool_depend>
- 
+
   <depend>autoware_perception_msgs</depend>
+  <depend>bevdet_vendor</depend>
   <depend>cv_bridge</depend>
   <depend>geometry_msgs</depend>
   <depend>libopencv-dev</depend>
@@ -20,7 +21,6 @@
   <depend>rclcpp_components</depend>
   <depend>sensor_msgs</depend>
   <depend>tf2_geometry_msgs</depend>
-  <depend>bevdet_vendor</depend>
 
   <test_depend>ament_lint_auto</test_depend>
   <test_depend>autoware_lint_common</test_depend>
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 13e7e38a2df97..1f917916534df 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -16,6 +16,7 @@
 // BGRCHW
 
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
+
 #include <preprocess.h>
 
 namespace autoware
@@ -52,7 +53,8 @@ void TRTBEVDetNode::initModel()
   engine_file_ = this->declare_parameter<std::string>("engine_path", "bevdet_one_lt_d.engine");
 
   imgs_name_ = this->declare_parameter<std::vector<std::string>>("data_params.cams");
-  class_names_ = this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
+  class_names_ =
+    this->declare_parameter<std::vector<std::string>>("post_process_params.class_names");
 
   RCLCPP_INFO_STREAM(this->get_logger(), "Successful load config!");
 
@@ -235,5 +237,4 @@ TRTBEVDetNode::~TRTBEVDetNode()
 }  // namespace tensorrt_bevdet
 }  // namespace autoware
 #include <rclcpp_components/register_node_macro.hpp>
-RCLCPP_COMPONENTS_REGISTER_NODE(
-  autoware::tensorrt_bevdet::TRTBEVDetNode)
+RCLCPP_COMPONENTS_REGISTER_NODE(autoware::tensorrt_bevdet::TRTBEVDetNode)
diff --git a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
index d2a78f32722f4..1c6114634b828 100644
--- a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #include "autoware/tensorrt_bevdet/ros_utils.hpp"
-#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 
+#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
 
@@ -114,4 +114,4 @@ void getCameraIntrinsics(
   intrinsics << msg->k[0], msg->k[1], msg->k[2], msg->k[3], msg->k[4], msg->k[5], msg->k[6],
     msg->k[7], msg->k[8];
 }
-} // namespace autoware::tensorrt_bevdet
\ No newline at end of file
+}  // namespace autoware::tensorrt_bevdet

From 02f0178e9c6941a9db0576a37c2948feec5fda0a Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 19 Nov 2024 15:19:25 +0800
Subject: [PATCH 79/85] fix: add missing links in the readme.md

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index a4a181aa24886..5eaf5154bb723 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -11,8 +11,8 @@ The core algorithm, named `BEVDet`, it unifies multi-view images into the perspe
 <!-- cspell: ignore Junjie Huang, Guan Huang -->
 
 - Junjie Huang, Guan Huang, "BEVPoolv2: A Cutting-edge Implementation of BEVDet Toward Deployment", [[ref](https://arxiv.org/pdf/2211.17111)]
-- [bevdet_vendor]() package are copied from the [original codes](https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one) (The TensorRT, C++ implementation by LCH1238) and modified.
-- This package is ported version toward Autoware from [bevdet_vendor]().
+- [bevdet_vendor](https://github.com/autowarefoundation/bevdet_vendor) package are copied from the [original codes](https://github.com/LCH1238/bevdet-tensorrt-cpp/tree/one) (The TensorRT, C++ implementation by LCH1238) and modified.
+- This package is ported version toward Autoware from [bevdet_vendor](https://github.com/autowarefoundation/bevdet_vendor).
 
 ## Inputs / Outputs
 

From d3608e78e6f29d8be596d68c96e3217fd49371f8 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 19 Nov 2024 16:25:24 +0800
Subject: [PATCH 80/85] fix: mark TRTBEVDetNode constructor to explicit and try
 fix spell-check error

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 .../autoware/tensorrt_bevdet/bevdet_node.hpp  |  3 +--
 .../autoware/tensorrt_bevdet/ros_utils.hpp    |  7 ++++++-
 .../src/bevdet_node.cpp                       | 20 +------------------
 .../src/ros_utils.cpp                         | 19 ++++++++++++++++++
 4 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
index 2cd90cfc0180e..5d0ab4d5ac150 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/bevdet_node.hpp
@@ -22,7 +22,6 @@
 
 #include <Eigen/Core>
 #include <Eigen/Geometry>
-#include <opencv2/opencv.hpp>
 
 #include <sensor_msgs/msg/image.hpp>
 
@@ -159,7 +158,7 @@ class TRTBEVDetNode : public rclcpp::Node
    * @brief Constructor for TRTBEVDetNode.
    * @param options The options for the node.
    */
-  TRTBEVDetNode(const rclcpp::NodeOptions & options);
+  explicit TRTBEVDetNode(const rclcpp::NodeOptions & options);
 
   /**
    * @brief Destructor for TRTBEVDetNode.
diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
index 7a5b400c25d2b..c804274a242a4 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// cspell:ignore BEVDET, bevdet
+
 #ifndef AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
 
@@ -21,7 +23,7 @@
 #include <geometry_msgs/msg/vector3.hpp>
 #include <sensor_msgs/msg/camera_info.hpp>
 #include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
-
+#include <opencv2/opencv.hpp>
 #include <bevdet.h>
 
 #include <string>
@@ -43,5 +45,8 @@ void getTransform(
 void getCameraIntrinsics(
   const sensor_msgs::msg::CameraInfo::SharedPtr msg, Eigen::Matrix3f & intrinsics);
 
+// Convert images from OpenCV's cv:: Mat format to a specific format
+void imageTransport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height);
+
 }  // namespace autoware::tensorrt_bevdet
 #endif  // AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
diff --git a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
index 1f917916534df..afdbe6b212450 100644
--- a/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/bevdet_node.cpp
@@ -12,13 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm, RGBHWC,
-// BGRCHW
+// cspell:ignore BEVDET, thre, TRTBEV, bevdet, caminfo, intrin, Ncams, bevfeat, dlongterm
 
 #include "autoware/tensorrt_bevdet/bevdet_node.hpp"
 
-#include <preprocess.h>
-
 namespace autoware
 {
 namespace tensorrt_bevdet
@@ -144,21 +141,6 @@ void TRTBEVDetNode::startCameraInfoSubscription()
     [this](const sensor_msgs::msg::CameraInfo::SharedPtr msg) { cameraInfoCallback(5, msg); });
 }
 
-void imageTransport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
-{
-  uchar * temp = new uchar[width * height * 3];
-  uchar * temp_gpu = nullptr;
-  CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
-
-  for (size_t i = 0; i < imgs.size(); i++) {
-    cv::cvtColor(imgs[i], imgs[i], cv::COLOR_BGR2RGB);
-    CHECK_CUDA(cudaMemcpy(temp_gpu, imgs[i].data, width * height * 3, cudaMemcpyHostToDevice));
-    convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
-  }
-  delete[] temp;
-  CHECK_CUDA(cudaFree(temp_gpu));
-}
-
 void TRTBEVDetNode::callback(
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_fl_img,
   const sensor_msgs::msg::Image::ConstSharedPtr & msg_f_img,
diff --git a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
index 1c6114634b828..d8646934e55f8 100644
--- a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
@@ -12,11 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// cspell:ignore bevdet, RGBHWC, BGRCHW
+
 #include "autoware/tensorrt_bevdet/ros_utils.hpp"
 
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
+#include <preprocess.h>
 
 namespace autoware::tensorrt_bevdet
 {
@@ -114,4 +117,20 @@ void getCameraIntrinsics(
   intrinsics << msg->k[0], msg->k[1], msg->k[2], msg->k[3], msg->k[4], msg->k[5], msg->k[6],
     msg->k[7], msg->k[8];
 }
+
+void imageTransport(std::vector<cv::Mat> imgs, uchar * out_imgs, size_t width, size_t height)
+{
+  uchar * temp = new uchar[width * height * 3];
+  uchar * temp_gpu = nullptr;
+  CHECK_CUDA(cudaMalloc(&temp_gpu, width * height * 3));
+
+  for (size_t i = 0; i < imgs.size(); i++) {
+    cv::cvtColor(imgs[i], imgs[i], cv::COLOR_BGR2RGB);
+    CHECK_CUDA(cudaMemcpy(temp_gpu, imgs[i].data, width * height * 3, cudaMemcpyHostToDevice));
+    convert_RGBHWC_to_BGRCHW(temp_gpu, out_imgs + i * width * height * 3, 3, height, width);
+  }
+  delete[] temp;
+  CHECK_CUDA(cudaFree(temp_gpu));
+}
+
 }  // namespace autoware::tensorrt_bevdet

From 960e6b71715b296ab481df1a0fd57b7e5a5fd973 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 08:28:13 +0000
Subject: [PATCH 81/85] style(pre-commit): autofix

---
 .../include/autoware/tensorrt_bevdet/ros_utils.hpp            | 4 +++-
 perception/autoware_tensorrt_bevdet/src/ros_utils.cpp         | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
index c804274a242a4..8ee6491d38605 100644
--- a/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
+++ b/perception/autoware_tensorrt_bevdet/include/autoware/tensorrt_bevdet/ros_utils.hpp
@@ -17,13 +17,15 @@
 #ifndef AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
 #define AUTOWARE__TENSORRT_BEVDET__ROS_UTILS_HPP_
 
+#include <opencv2/opencv.hpp>
+
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
 #include <geometry_msgs/msg/point32.hpp>
 #include <geometry_msgs/msg/quaternion.hpp>
 #include <geometry_msgs/msg/vector3.hpp>
 #include <sensor_msgs/msg/camera_info.hpp>
 #include <tf2_geometry_msgs/tf2_geometry_msgs.hpp>
-#include <opencv2/opencv.hpp>
+
 #include <bevdet.h>
 
 #include <string>
diff --git a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
index d8646934e55f8..78b678d4c83e3 100644
--- a/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
+++ b/perception/autoware_tensorrt_bevdet/src/ros_utils.cpp
@@ -19,6 +19,7 @@
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
+
 #include <preprocess.h>
 
 namespace autoware::tensorrt_bevdet

From 110e975d09f23c35f8d7552a1d666cf62451acbc Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Wed, 20 Nov 2024 18:03:25 +0800
Subject: [PATCH 82/85] fix: add bevdet_vendor dependency in
 build_depends.repos

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 build_depends.repos | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build_depends.repos b/build_depends.repos
index 0c1deb7194cfb..2bcc02c7c4db2 100644
--- a/build_depends.repos
+++ b/build_depends.repos
@@ -58,3 +58,7 @@ repositories:
     type: git
     url: https://github.com/autowarefoundation/ament_cmake.git
     version: feat/faster_ament_libraries_deduplicate
+  universe/external/bevdet_vendor:
+    type: git
+    url: https://github.com/autowarefoundation/bevdet_vendor.git
+    version: bevdet_vendor-ros2

From eeb670aa7ba1f10e7f1e03d7d9e066198137e988 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 21 Nov 2024 15:38:21 +0800
Subject: [PATCH 83/85] fix: simplify the import check of bevdet_vendor package

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/CMakeLists.txt | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/CMakeLists.txt b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
index e9e8c36e18bd4..bd2ffb4dbb9b5 100644
--- a/perception/autoware_tensorrt_bevdet/CMakeLists.txt
+++ b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
@@ -3,14 +3,9 @@ project(autoware_tensorrt_bevdet)
 
 add_compile_options(-std=c++17)
 
-find_package(bevdet_vendor)
-if(NOT ${bevdet_vendor_FOUND})
-  message(WARNING "The bevdet_vendor package is not found. Please check its dependencies.")
-  return()
-endif()
-
 find_package(autoware_cmake REQUIRED)
 autoware_package()
+find_package(bevdet_vendor REQUIRED)
 find_package(rclcpp REQUIRED)
 find_package(rclcpp_components REQUIRED)
 find_package(Eigen3 REQUIRED)

From 45836523da178a74cc615f3fe8e807fd41e2af38 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Thu, 21 Nov 2024 17:46:23 +0800
Subject: [PATCH 84/85] fix: modify README.md

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/CMakeLists.txt |  7 ++++++-
 perception/autoware_tensorrt_bevdet/README.md      | 14 ++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/perception/autoware_tensorrt_bevdet/CMakeLists.txt b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
index bd2ffb4dbb9b5..e9e8c36e18bd4 100644
--- a/perception/autoware_tensorrt_bevdet/CMakeLists.txt
+++ b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
@@ -3,9 +3,14 @@ project(autoware_tensorrt_bevdet)
 
 add_compile_options(-std=c++17)
 
+find_package(bevdet_vendor)
+if(NOT ${bevdet_vendor_FOUND})
+  message(WARNING "The bevdet_vendor package is not found. Please check its dependencies.")
+  return()
+endif()
+
 find_package(autoware_cmake REQUIRED)
 autoware_package()
-find_package(bevdet_vendor REQUIRED)
 find_package(rclcpp REQUIRED)
 find_package(rclcpp_components REQUIRED)
 find_package(Eigen3 REQUIRED)
diff --git a/perception/autoware_tensorrt_bevdet/README.md b/perception/autoware_tensorrt_bevdet/README.md
index 5eaf5154bb723..b2ce948dab0ab 100644
--- a/perception/autoware_tensorrt_bevdet/README.md
+++ b/perception/autoware_tensorrt_bevdet/README.md
@@ -52,19 +52,21 @@ The core algorithm, named `BEVDet`, it unifies multi-view images into the perspe
 
    Please refer to open source repository [ros2_dataset_bridge](https://github.com/Owen-Liuyuxuan/ros2_dataset_bridge) to publish the ROS 2 topics for NuScenes dataset.
 
-## Limitation
-
-The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.
-
 ## Trained Models
 
 You can download the onnx format of trained models by clicking on the links below.
 
 - BEVDet: [bevdet_one_lt_d.onnx](https://drive.google.com/file/d/1eMGJfdCVlDPBphBTjMcnIh3wdW7Q7WZB/view?usp=sharing)
 
-The model was trained in NuScenes database for 20 epochs.
+The `BEVDet` model was trained in `NuScenes` dataset for 20 epochs.
+
+## Limitation
+
+The model is trained on open-source dataset `NuScenes` and has poor generalization on its own dataset, If you want to use this model to infer your data, you need to retrain it.
+
+## Training BEVDet Model
 
-If you want to train model using the [TIER IV's internal database(~2600 key frames)](https://drive.google.com/file/d/1UaarK88HZu09sf7Ix-bEVl9zGNGFwTVL/view?usp=sharing), please refer to the following repositories:[BEVDet adapted to TIER IV dataset](https://github.com/cyn-liu/BEVDet/tree/train_export)
+If you want to train model using the [TIER IV's internal database(~2600 key frames)](https://drive.google.com/file/d/1UaarK88HZu09sf7Ix-bEVl9zGNGFwTVL/view?usp=sharing), please refer to the following repositories:[BEVDet adapted to TIER IV dataset](https://github.com/cyn-liu/BEVDet/tree/train_export).
 
 ## References/External links
 

From 56112576665fcd33b9df8b3f4e299c0492c6db64 Mon Sep 17 00:00:00 2001
From: liu cui <cynthia.liu@autocore.ai>
Date: Tue, 26 Nov 2024 09:39:27 +0800
Subject: [PATCH 85/85] fix: delete unnecessary spaces in CMakeLists.txt

Signed-off-by: liu cui <cynthia.liu@autocore.ai>
---
 perception/autoware_tensorrt_bevdet/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/autoware_tensorrt_bevdet/CMakeLists.txt b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
index e9e8c36e18bd4..dbdacec3f41ea 100644
--- a/perception/autoware_tensorrt_bevdet/CMakeLists.txt
+++ b/perception/autoware_tensorrt_bevdet/CMakeLists.txt
@@ -43,7 +43,7 @@ target_link_libraries(${PROJECT_NAME}_component
 rclcpp_components_register_node(${PROJECT_NAME}_component
     PLUGIN "autoware::tensorrt_bevdet::TRTBEVDetNode"
     EXECUTABLE ${PROJECT_NAME}_node
-  )
+)
 
 ament_auto_package(
   INSTALL_TO_SHARE