diff --git a/CMakeLists.txt b/CMakeLists.txt index 6909cdc..15ff842 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,8 +24,12 @@ set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo") set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") -set(RAPIDS_TRITON_REPO_TAG "main" CACHE STRING "Tag for rapidsai/rapids-triton repo") + +# Specify *minimum* version for all RAPIDS dependencies +# Some RAPIDS deps may have later versions +set(RAPIDS_DEPENDENCIES_VERSION "24.10" CACHE STRING "RAPIDS projects dependencies version") set(RAPIDS_TRITON_REPO_PATH "https://github.com/rapidsai/rapids-triton.git" CACHE STRING "Git repository to pull rapids_triton from") +set(RAPIDS_TRITON_REPO_TAG "branch-${RAPIDS_DEPENDENCIES_VERSION}" CACHE STRING "Tag for rapidsai/rapids-triton repo") if(TRITON_FIL_DOCKER_BUILD) project(RAPIDS_TRITON_BACKEND VERSION 22.10.00) @@ -45,6 +49,7 @@ if(TRITON_FIL_DOCKER_BUILD) --build-arg TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} --build-arg TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} --build-arg TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} + --build-arg RAPIDS_DEPENDENCIES_VERSION=${RAPIDS_DEPENDENCIES_VERSION} --build-arg RAPIDS_TRITON_REPO_TAG=${RAPIDS_TRITON_REPO_TAG} --build-arg RAPIDS_TRITON_REPO_PATH=${RAPIDS_TRITON_REPO_PATH} -f ${CMAKE_CURRENT_LIST_DIR}/ops/Dockerfile @@ -76,7 +81,7 @@ else() ############################################################################## # - Prepare rapids-cmake ----------------------------------------------------- file(DOWNLOAD - https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake + https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.10/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) include(rapids-cmake) @@ -95,9 +100,6 @@ else() option(DISABLE_DEPRECATION_WARNINGS "Disable depreaction warnings " ON) option(NVTX "Enable nvtx markers" OFF) set(BACKEND_FOLDER "/opt/tritonserver/backends" CACHE STRING "Triton backend folder path") - # Specify *minimum* version for all RAPIDS dependencies - # Some RAPIDS deps may have later versions - set(RAPIDS_DEPENDENCIES_VERSION "24.04" CACHE STRING "RAPIDS projects dependencies version") option(TRITON_FIL_USE_TREELITE_STATIC "Link Treelite statically in libtriton_fil.so and cuml++.so" ON) diff --git a/build.sh b/build.sh index f2951d5..beca203 100755 --- a/build.sh +++ b/build.sh @@ -36,34 +36,35 @@ HELP="$0 [ ...] [ ...] default action (no args) is to build all targets The following environment variables are also accepted to allow further customization: - BASE_IMAGE - Base image for Docker images, or build image for build.py - TRITON_VERSION - Triton version to use for build - SERVER_TAG - The tag to use for the server image - TEST_TAG - The tag to use for the test image - CONDA_DEV_TAG - The tag of the image containing dev Conda env; if set, build.sh - will attempt to leverage the pre-built Conda env to speed up - the build the server image - CONDA_TEST_TAG - The tag of the image containing test Conda env; if set, build.sh - will attempt to leverage the pre-built Conda env to speed up - the build the test image - PREBUILT_IMAGE - A server image to be tested (used as base of test image) - TRITON_REF - Commit ref for Triton when using build.py - COMMON_REF - Commit ref for Triton common repo when using build.py - CORE_REF - Commit ref for Triton core repo when using build.py - BACKEND_REF - Commit ref for Triton backend repo when using build.py - THIRDPARTY_REF - Commit ref for Triton third-party repos when using build.py - JOB_ID - A unique id to use for this build job - USE_CLIENT_WHEEL - If 1, Triton Python client will be installed from wheel - distributed in a Triton SDK image. - SDK_IMAGE - If set, client wheel will be copied from this image. - Otherwise, if USE_CLIENT_WHEEL is 1, use SDK image - corresponding to TRITON_VERSION - BUILDPY_BRANCH - Instead of autodetecting the current branch of the FIL - backend repo, use this branch when building with - build.py. For all other build methods, the backend will - simply be built with the current version of the code - TREELITE_STATIC - If ON, Treelite will be statically linked into the binaries - RAPIDS_VERSION - The version of RAPIDS to require for RAPIDS dependencies + BASE_IMAGE - Base image for Docker images, or build image for build.py + TRITON_VERSION - Triton version to use for build + SERVER_TAG - The tag to use for the server image + TEST_TAG - The tag to use for the test image + CONDA_DEV_TAG - The tag of the image containing dev Conda env; if set, build.sh + will attempt to leverage the pre-built Conda env to speed up + the build the server image + CONDA_TEST_TAG - The tag of the image containing test Conda env; if set, build.sh + will attempt to leverage the pre-built Conda env to speed up + the build the test image + PREBUILT_IMAGE - A server image to be tested (used as base of test image) + TRITON_REF - Commit ref for Triton when using build.py + COMMON_REF - Commit ref for Triton common repo when using build.py + CORE_REF - Commit ref for Triton core repo when using build.py + BACKEND_REF - Commit ref for Triton backend repo when using build.py + THIRDPARTY_REF - Commit ref for Triton third-party repos when using build.py + JOB_ID - A unique id to use for this build job + USE_CLIENT_WHEEL - If 1, Triton Python client will be installed from wheel + distributed in a Triton SDK image. + SDK_IMAGE - If set, client wheel will be copied from this image. + Otherwise, if USE_CLIENT_WHEEL is 1, use SDK image + corresponding to TRITON_VERSION + BUILDPY_BRANCH - Instead of autodetecting the current branch of the FIL + backend repo, use this branch when building with + build.py. For all other build methods, the backend will + simply be built with the current version of the code + TREELITE_STATIC - If ON, Treelite will be statically linked into the binaries + RAPIDS_VERSION - The version of RAPIDS to require for RAPIDS dependencies + RAPIDS_TRITON_REPO_TAG - Commit ref for RAPIDS-Triton " BUILD_TYPE=Release @@ -167,7 +168,7 @@ DOCKER_ARGS="$DOCKER_ARGS --build-arg TRITON_ENABLE_GPU=${TRITON_ENABLE_GPU}" if [ -z $RAPIDS_VERSION ] then - RAPIDS_VERSION=23.12 + RAPIDS_VERSION=24.10 else DOCKER_ARGS="$DOCKER_ARGS --build-arg RAPIDS_DEPENDENCIES_VERSION=${RAPIDS_VERSION}" fi @@ -214,6 +215,11 @@ else [ ! -z $THIRDPARTY_REF ] || THIRDPARTY_REF='main' fi +if [ ! -z $RAPIDS_TRITON_REPO_TAG ] +then + DOCKER_ARGS="$DOCKER_ARGS --build-arg RAPIDS_TRITON_REPO_TAG=${RAPIDS_TRITON_REPO_TAG}" +fi + if [ ! -z $SDK_IMAGE ] then USE_CLIENT_WHEEL=1 diff --git a/cmake/thirdparty/get_rapids-triton.cmake b/cmake/thirdparty/get_rapids-triton.cmake index 0126708..af42fea 100644 --- a/cmake/thirdparty/get_rapids-triton.cmake +++ b/cmake/thirdparty/get_rapids-triton.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,21 +34,7 @@ function(find_and_configure_rapids_triton) ) endfunction() -# Change pinned tag here to test a commit in CI -# To use a different RAFT locally, set the CMake variable -# CPM_raft_SOURCE=/path/to/local/raft -set (RAPIDS_FORK https://github.com/rapidsai/rapids-triton.git) -set (REPO_TAG branch-${RAPIDS_DEPENDENCIES_VERSION}) -message(STATUS "Setting repo tag to ${REPO_TAG} for rapids fork ${RAPIDS_FORK}") -# if Triton tag and organization is non-default, change the fork and repo tag used -# for rapids -if (NOT RAPIDS_TRITON_REPO_PATH STREQUAL RAPIDS_FORK) - set (RAPIDS_FORK ${RAPIDS_TRITON_REPO_PATH}) - set (REPO_TAG ${RAPIDS_TRITON_REPO_TAG}) - message(STATUS "Re-setting repo tag to ${REPO_TAG} for rapids fork ${RAPIDS_FORK}") -endif() - find_and_configure_rapids_triton(VERSION ${RAPIDS_DEPENDENCIES_VERSION} - FORK ${RAPIDS_FORK} - PINNED_TAG ${REPO_TAG} + FORK ${RAPIDS_TRITON_REPO_PATH} + PINNED_TAG ${RAPIDS_TRITON_REPO_TAG} ) diff --git a/conda/environments/triton_benchmark.yml b/conda/environments/triton_benchmark.yml index d1a4966..6f193ae 100644 --- a/conda/environments/triton_benchmark.yml +++ b/conda/environments/triton_benchmark.yml @@ -15,4 +15,4 @@ dependencies: - pip: - tritonclient[all] - protobuf - - git+https://github.com/rapidsai/rapids-triton.git@branch-24.04#subdirectory=python + - git+https://github.com/rapidsai/rapids-triton.git@branch-24.10#subdirectory=python diff --git a/conda/environments/triton_test.yml b/conda/environments/triton_test.yml index 85d4a0b..cae091e 100644 --- a/conda/environments/triton_test.yml +++ b/conda/environments/triton_test.yml @@ -22,4 +22,4 @@ dependencies: - pip: - tritonclient[all] - protobuf - - git+https://github.com/rapidsai/rapids-triton.git@branch-24.04#subdirectory=python + - git+https://github.com/rapidsai/rapids-triton.git@branch-24.10#subdirectory=python diff --git a/ops/Dockerfile b/ops/Dockerfile index 31ff4d3..0af8948 100644 --- a/ops/Dockerfile +++ b/ops/Dockerfile @@ -63,7 +63,7 @@ RUN conda run --no-capture-output -n triton_test \ FROM wheel-install-${USE_CLIENT_WHEEL} as conda-test RUN conda run --no-capture-output -n triton_test \ - pip install git+https://github.com/rapidsai/rapids-triton.git@branch-24.04#subdirectory=python + pip install git+https://github.com/rapidsai/rapids-triton.git@branch-24.10#subdirectory=python RUN conda-pack --ignore-missing-files -n triton_test -o /tmp/env.tar \ && mkdir /conda/test/ \ && cd /conda/test/ \ @@ -150,7 +150,7 @@ ENV TRITON_ENABLE_GPU=$TRITON_ENABLE_GPU # Specify *minimum* version for all RAPIDS dependencies # Some RAPIDS deps may have later versions -ARG RAPIDS_DEPENDENCIES_VERSION=24.04 +ARG RAPIDS_DEPENDENCIES_VERSION=24.10 ENV RAPIDS_DEPENDENCIES_VERSION=$RAPIDS_DEPENDENCIES_VERSION ARG TRITON_FIL_USE_TREELITE_STATIC=ON diff --git a/qa/L0_e2e/test_model.py b/qa/L0_e2e/test_model.py index d2d9453..30723a2 100644 --- a/qa/L0_e2e/test_model.py +++ b/qa/L0_e2e/test_model.py @@ -225,7 +225,7 @@ def model_data(request, client, model_repo): config = client.get_model_config(name) input_shapes = {input_.name: list(input_.dims) for input_ in config.input} output_sizes = { - output.name: np.product(output.dims) * np.dtype("float32").itemsize + output.name: np.prod(output.dims) * np.dtype("float32").itemsize for output in config.output } max_batch_size = config.max_batch_size