diff --git a/CMakeLists.txt b/CMakeLists.txt index 315f036..07744ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,8 @@ if (${TRITON_PYTORCH_DOCKER_BUILD}) COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so + # TODO: Revisit when not needed by making it part of cuda base container. + COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda/lib64/libcusparseLt.so libcusparseLt.so COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi" COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin/torchtrtc torchtrtc || echo "error ignored..." || true @@ -434,6 +436,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD}) install( FILES ${PT_LIB_PATHS} + ${CMAKE_CURRENT_BINARY_DIR}/libcusparseLt.so ${CMAKE_CURRENT_BINARY_DIR}/LICENSE.pytorch DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/pytorch ) @@ -474,6 +477,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD}) COMMAND ln -sf libopencv_flann.so libopencv_flann.so.${OPENCV_VERSION} COMMAND ln -sf libpng16.so libpng16.so.16 COMMAND ln -sf libjpeg.so libjpeg.so.8 + COMMAND ln -sf libcusparseLt.so libcusparseLt.so.0 RESULT_VARIABLE LINK_STATUS WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/backends/pytorch) if(LINK_STATUS AND NOT LINK_STATUS EQUAL 0) diff --git a/README.md b/README.md index 731a7c3..8ed211f 100644 --- a/README.md +++ b/README.md @@ -146,11 +146,11 @@ key: "INFERENCE_MODE" * `DISABLE_CUDNN`: Boolean flag to disable the cuDNN library. By default, cuDNN is enabled. -[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for +[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for deep neural networks. cuDNN provides highly tuned implementations for standard routines. Typically, models run with cuDNN enabled are faster. However there are some exceptions -where using cuDNN can be slower, cause higher memory usage or result in errors. +where using cuDNN can be slower, cause higher memory usage or result in errors. The section of model config file specifying this parameter will look like: