Support features for TRT backend for phase 2 (#6)

* Support features for TRT backend for phase 2 * Fix copyrights
triton-inference-server · Aug 10, 2021 · d4ca619 · d4ca619
1 parent 28de1ec
commit d4ca619
Show file tree

Hide file tree

Showing 16 changed files with 958 additions and 266 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -130,7 +130,8 @@ target_compile_features(triton-tensorrt-backend PRIVATE cxx_std_11)
 target_compile_options(
   triton-tensorrt-backend PRIVATE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror -Wno-deprecated-declarations>
+  $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
 )
 
 # C/C++ defines that are used directly by this backend.
@@ -139,31 +140,47 @@ target_compile_definitions(
     PRIVATE TRITON_ENABLE_GPU=1
 )
 
-set_target_properties(
-  triton-tensorrt-backend
-  PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME ${TRITON_TENSORRT_BACKEND_LIBNAME}
-    SKIP_BUILD_RPATH TRUE
-    BUILD_WITH_INSTALL_RPATH TRUE
-    INSTALL_RPATH_USE_LINK_PATH FALSE
-    INSTALL_RPATH "$\{ORIGIN\}"
-    LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_tensorrt.ldscript
-    LINK_FLAGS "-Wl,--version-script libtriton_tensorrt.ldscript"
-)
+if (WIN32)
+  set_target_properties(
+    triton-tensorrt-backend
+    PROPERTIES
+      POSITION_INDEPENDENT_CODE ON
+      OUTPUT_NAME ${TRITON_TENSORRT_BACKEND_LIBNAME}
+      SKIP_BUILD_RPATH TRUE
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+      INSTALL_RPATH "$\{ORIGIN\}"
+  )
+else ()
+  set_target_properties(
+    triton-tensorrt-backend
+    PROPERTIES
+      POSITION_INDEPENDENT_CODE ON
+      OUTPUT_NAME ${TRITON_TENSORRT_BACKEND_LIBNAME}
+      SKIP_BUILD_RPATH TRUE
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+      INSTALL_RPATH "$\{ORIGIN\}"
+      LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_tensorrt.ldscript
+      LINK_FLAGS "-Wl,--version-script libtriton_tensorrt.ldscript"
+  )
+endif()
 
 FOREACH(p ${TRITON_TENSORRT_LIB_PATHS})
     set(TRITON_TENSORRT_LDFLAGS ${TRITON_TENSORRT_LDFLAGS} "-L${p}")
 ENDFOREACH(p)
 
-
+find_library(NVINFER_LIBRARY NAMES nvinfer)
+find_library(NVINFER_PLUGIN_LIBRARY NAMES nvinfer_plugin)
 target_link_libraries(
   triton-tensorrt-backend
   PRIVATE
     triton-core-serverapi   # from repo-core
     triton-core-serverstub  # from repo-core
     triton-backend-utils    # from repo-backend
     -lpthread
+    ${NVINFER_LIBRARY}
+    ${NVINFER_PLUGIN_LIBRARY}
 )
 
 target_link_libraries(
@@ -184,14 +201,25 @@ target_link_libraries(
 include(GNUInstallDirs)
 set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonTensorRTBackend)
 
-install(
-  TARGETS
-    triton-tensorrt-backend
-  EXPORT
-    triton-tensorrt-backend-targets
-  LIBRARY DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
-  ARCHIVE DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
-)
+if (NOT WIN32)
+  install(
+    TARGETS
+      triton-tensorrt-backend
+    EXPORT
+      triton-tensorrt-backend-targets
+    LIBRARY DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+    ARCHIVE DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+  )
+else()
+  install(
+    TARGETS
+      triton-tensorrt-backend
+    EXPORT
+      triton-tensorrt-backend-targets
+    RUNTIME DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+    ARCHIVE DESTINATION ${TRITON_TENSORRT_BACKEND_INSTALLDIR}
+  )
+endif() # WIN32
 
 install(
   EXPORT

diff --git a/LICENSE b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions

diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

diff --git a/cmake/TritonTensorRTBackendConfig.cmake.in b/cmake/TritonTensorRTBackendConfig.cmake.in
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

diff --git a/src/libtriton_tensorrt.ldscript b/src/libtriton_tensorrt.ldscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

diff --git a/src/loader.cc b/src/loader.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -35,8 +35,8 @@ namespace triton { namespace backend { namespace tensorrt {
 
 TRITONSERVER_Error*
 LoadPlan(
-    const std::string& plan_path, nvinfer1::IRuntime** runtime,
-    nvinfer1::ICudaEngine** engine)
+    const std::string& plan_path, const int64_t dla_core_id,
+    nvinfer1::IRuntime** runtime, nvinfer1::ICudaEngine** engine)
 {
   // Create runtime only if it is not provided
   if (*runtime == nullptr) {
@@ -47,6 +47,21 @@ LoadPlan(
     }
   }
 
+  // Report error if 'dla_core_id' >= number of DLA cores
+  if (dla_core_id != -1) {
+    auto dla_core_count = (*runtime)->getNbDLACores();
+    if (dla_core_id < dla_core_count) {
+      (*runtime)->setDLACore(dla_core_id);
+    } else {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          (std::string("unable to create TensorRT runtime with DLA Core ID: ") +
+           std::to_string(dla_core_id) +
+           ", available number of DLA cores: " + std::to_string(dla_core_count))
+              .c_str());
+    }
+  }
+
   std::string model_data_str;
   RETURN_IF_ERROR(ReadTextFile(plan_path, &model_data_str));
   std::vector<char> model_data(model_data_str.begin(), model_data_str.end());

diff --git a/src/loader.h b/src/loader.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -37,14 +37,16 @@ namespace triton { namespace backend { namespace tensorrt {
 /// responsibility to destroy any returned runtime or engine object
 /// even if an error is returned.
 ///
-/// \param plan_path The path to the model plan file
+/// \param plan_path The path to the model plan file.
+/// \param dla_core_id The DLA core to use for this runtime. Does not
+/// use DLA when set to -1.
 /// \param runtime Returns the IRuntime object, or nullptr if failed
-/// to create
+/// to create.
 /// \param engine Returns the ICudaEngine object, or nullptr if failed
-/// to create
+/// to create.
 /// \return Error status.
 TRITONSERVER_Error* LoadPlan(
-    const std::string& plan_path, nvinfer1::IRuntime** runtime,
-    nvinfer1::ICudaEngine** engine);
+    const std::string& plan_path, const int64_t dla_core_id,
+    nvinfer1::IRuntime** runtime, nvinfer1::ICudaEngine** engine);
 
 }}}  // namespace triton::backend::tensorrt
diff --git a/src/logging.cc b/src/logging.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -33,7 +33,7 @@ namespace triton { namespace backend { namespace tensorrt {
 TensorRTLogger tensorrt_logger;
 
 void
-TensorRTLogger::log(Severity severity, const char* msg)
+TensorRTLogger::log(Severity severity, const char* msg) noexcept
 {
   switch (severity) {
     case Severity::kINTERNAL_ERROR:

diff --git a/src/logging.h b/src/logging.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -31,7 +31,7 @@ namespace triton { namespace backend { namespace tensorrt {
 
 // Logger for TensorRT API
 class TensorRTLogger : public nvinfer1::ILogger {
-  void log(Severity severity, const char* msg) override;
+  void log(Severity severity, const char* msg) noexcept override;
 };
 
 extern TensorRTLogger tensorrt_logger;