diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 34991e860df..cc51483ae9e 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,12 +29,14 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server +ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo ARG TRITON_COMMON_REPO_TAG=main ARG TRITON_CORE_REPO_TAG=main +ARG TRITON_CLIENT_REPO_TAG=main ARG TRITON_THIRD_PARTY_REPO_TAG=main ARG TRITON_MODEL_ANALYZER_REPO_TAG=main ARG TRITON_ENABLE_GPU=ON @@ -104,8 +106,10 @@ RUN rm -f /usr/bin/python && \ # Build the client library and examples ARG TRITON_REPO_ORGANIZATION ARG TRITON_CLIENT_REPO_SUBDIR +ARG TRITON_PA_REPO_SUBDIR ARG TRITON_COMMON_REPO_TAG ARG TRITON_CORE_REPO_TAG +ARG TRITON_CLIENT_REPO_TAG ARG TRITON_THIRD_PARTY_REPO_TAG ARG TRITON_ENABLE_GPU ARG JAVA_BINDINGS_MAVEN_VERSION @@ -115,26 +119,53 @@ ARG TARGETPLATFORM WORKDIR /workspace COPY TRITON_VERSION . COPY ${TRITON_CLIENT_REPO_SUBDIR} client +COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer -WORKDIR /workspace/build +WORKDIR /workspace/client_build RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ + -DTRITON_ENABLE_PERF_ANALYZER=OFF \ -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ - -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ + -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \ -DTRITON_ENABLE_JAVA_HTTP=ON \ - -DTRITON_ENABLE_PERF_ANALYZER=ON \ + -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ + -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client +RUN make -j16 cc-clients java-clients && \ + rm -fr ~/.m2 + +# TODO: PA will rebuild the CC clients since it depends on it. +# This should be optimized so that we do not have to build +# the CC clients twice. Similarly, because the SDK expectation is +# that PA is packaged with the python client, we hold off on building +# the python client until now. Post-migration we should focus +# effort on de-tangling these flows. +WORKDIR /workspace/pa_build +RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ + -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ + -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ + -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ + -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \ + -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \ -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \ -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \ -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \ - -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ - -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client -RUN make -j16 cc-clients python-clients java-clients && \ - rm -fr ~/.m2 + -DTRITON_ENABLE_CC_HTTP=ON \ + -DTRITON_ENABLE_CC_GRPC=ON \ + -DTRITON_ENABLE_PYTHON_HTTP=ON \ + -DTRITON_ENABLE_PYTHON_GRPC=ON \ + -DTRITON_PACKAGE_PERF_ANALYZER=ON \ + -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \ + /workspace/perf_analyzer +RUN make -j16 perf-analyzer python-clients + +RUN pip3 install build \ + && cd /workspace/perf_analyzer/genai-perf \ + && python3 -m build --wheel --outdir /workspace/install/python # Install Java API Bindings RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ @@ -145,9 +176,6 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ --jar-install-path /workspace/install/java-api-bindings; \ fi -RUN pip3 install build \ - && cd /workspace/client/src/c++/perf_analyzer/genai-perf \ - && python3 -m build --wheel --outdir /workspace/install/python ############################################################################ ## Create sdk container ############################################################################ diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min index 7d954d62de0..0a554fbcf4c 100644 --- a/Dockerfile.win10.min +++ b/Dockerfile.win10.min @@ -1,4 +1,4 @@ -# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -37,9 +37,9 @@ RUN choco install unzip -y # # Installing TensorRT # -ARG TENSORRT_VERSION=10.0.1.6 -ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.4.zip" -ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip +ARG TENSORRT_VERSION=10.2.0.19 +ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.5.zip" +ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5.zip # COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP} ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP} RUN unzip /tmp/%TENSORRT_ZIP% @@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}" # # Installing cuDNN # -ARG CUDNN_VERSION=9.1.0.70 +ARG CUDNN_VERSION=9.2.1.18 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip -ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.1.0.70_cuda12-archive.zip +ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.2.1.18_cuda12-archive.zip ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP} RUN unzip /tmp/%CUDNN_ZIP% RUN move cudnn-* cudnn @@ -88,7 +88,7 @@ LABEL PYTHON_VERSION=${PYTHON_VERSION} # # Installing CMake # -ARG CMAKE_VERSION=3.29.3 +ARG CMAKE_VERSION=3.30.0 RUN pip install cmake==%CMAKE_VERSION% ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake @@ -150,7 +150,7 @@ WORKDIR / # ARG CUDA_MAJOR=12 ARG CUDA_MINOR=5 -ARG CUDA_PATCH=0 +ARG CUDA_PATCH=1 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH} ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \ cudart_${CUDA_MAJOR}.${CUDA_MINOR} \ @@ -175,7 +175,7 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%" -ARG CUDNN_VERSION=9.1.0.70 +ARG CUDNN_VERSION=9.2.1.18 ENV CUDNN_VERSION ${CUDNN_VERSION} COPY --from=dependency_base /cudnn /cudnn RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\." @@ -183,7 +183,7 @@ RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\." RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\." LABEL CUDNN_VERSION="${CUDNN_VERSION}" -ARG TENSORRT_VERSION=10.0.1.6 +ARG TENSORRT_VERSION=10.2.0.19 ENV TRT_VERSION ${TENSORRT_VERSION} COPY --from=dependency_base /TensorRT /TensorRT RUN setx PATH "c:\TensorRT\lib;%PATH%" diff --git a/README.md b/README.md index 38b4759c489..17628b4f035 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@