Skip to content

Commit

Permalink
Pull request/1064 (#1069)
Browse files Browse the repository at this point in the history
* Allow building Merlin with older versions of `docker`.

* Did not copy perf_analyzer.

* Comments to explain what we do there.

* Properly fix the perf_analyzer issue.

* `python-libnvinfer` is finally delivered properly on ARM64.

* Fix HugeCTR compilation issues in TF image.

---------

Co-authored-by: Matthias Langer <[email protected]>
  • Loading branch information
EmmaQiaoCh and bashimao authored Sep 21, 2023
1 parent 58ceea4 commit 64966c5
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
26 changes: 13 additions & 13 deletions docker/dockerfile.merlin
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ ARG TRITON_VERSION=23.06
ARG DLFW_VERSION=23.06

ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
ARG SDK_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-sdk
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-min
ARG DLFW_IMAGE=nvcr.io/nvidia/tensorflow:${TRITON_VERSION}-tf2-py3

FROM ${FULL_IMAGE} as triton
FROM ${SDK_IMAGE} as sdk
FROM ${DLFW_IMAGE} as dlfw
FROM ${BASE_IMAGE} as build

Expand Down Expand Up @@ -118,8 +120,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/
# NOTE 2023-07: fil-backend is not available on ARM.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
# not a single source file to copy. To avoid this, we als specify a small dummy file.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.

ENV PATH=/opt/tritonserver/bin:${PATH}:
Expand Down Expand Up @@ -187,11 +190,12 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
python3 \
python3-pip \
python3-dev \
python3-libnvinfer \
rapidjson-dev \
tree \
wget \
zlib1g-dev \
# Required to build RocksDB and RdKafka..
# Required to build RocksDB and RdKafka.
libgflags-dev \
libbz2-dev \
libsnappy-dev \
Expand All @@ -208,11 +212,6 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
openssh-server \
# [ HugeCTR ]
libaio-dev && \
# NOTE: libnvinfer is installed anyway, just Python bindings are missing on ARM.
if [[ "$TARGETARCH" != "arm64" ]]; then \
# TensorRT dependencies
apt install -y --no-install-recommends python3-libnvinfer \
; fi && \
apt autoremove -y && \
apt clean && \
rm -rf /var/lib/apt/lists/*
Expand All @@ -225,7 +224,7 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/lib:${JAVA_HOME}/lib/server
# Binaries
COPY --chown=1000:1000 --from=build /usr/local/bin/cmake /usr/local/bin/
COPY --chown=1000:1000 --from=build /usr/local/bin/pytest /usr/local/bin/
COPY --chown=1000:1000 --from=build /usr/local/bin/perf_* /usr/local/bin/
COPY --chown=1000:1000 --from=sdk /usr/local/bin/perf_* /usr/local/bin/

# Triton Server
WORKDIR /opt/tritonserver
Expand All @@ -237,8 +236,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/python/
# NOTE 2023-07: fil-backend is not available on ARM.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
# not a single source file to copy. To avoid this, we als specify a small dummy file.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/tensorrt/
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.
COPY --chown=1000:1000 --from=triton /usr/lib/*-linux-gnu/libdcgm.so.2 /tmp
Expand Down Expand Up @@ -362,7 +362,7 @@ ENV PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin \
YARN_NODEMANAGER_USER=root \
# Tackles with ThreadReaper stack overflow issues: https://bugs.openjdk.java.net/browse/JDK-8153057
LIBHDFS_OPTS='-Djdk.lang.processReaperUseDefaultStackSize=true' \
# Tackles with JVM setting error signals that UCX library will check (GitLab issue #425).
# Tackles with JVM setting error signals that the UCX library checks (GitLab issue #425).
UCX_ERROR_SIGNALS='' \
CLASSPATH=${CLASSPATH}:\
${HADOOP_HOME}/etc/hadoop/*:\
Expand All @@ -389,7 +389,7 @@ ENV PATH=$PATH:${HUGECTR_HOME}/bin \
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HUGECTR_HOME}/lib

RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \
# Install HugeCTR inference which is dependency for hps_backenc
# Install HugeCTR inference which is dependency for hps_backend
git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
cd /hugectr && \
git submodule update --init --recursive && \
Expand Down
4 changes: 3 additions & 1 deletion docker/dockerfile.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ ARG _CI_JOB_TOKEN=""
ARG HUGECTR_VER=main

ENV LD_LIBRARY_PATH=/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \
LIBRARY_PATH=${HUGECTR_HOME}/lib:$LIBRARY_PATH \
SOK_COMPILE_UNIT_TEST=ON

RUN mkdir -p /usr/local/nvidia/lib64 && \
Expand All @@ -55,6 +54,9 @@ ARG INSTALL_DISTRIBUTED_EMBEDDINGS=false
ARG TFDE_VER=v23.03.00
RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \
export HUGECTR_HOME=/usr/local/hugectr && \
rm -rf ${HUGECTR_HOME}/lib/libgmock* ${HUGECTR_HOME}/lib/pkgconfig/gmock* ${HUGECTR_HOME}/include/gmock && \
rm -rf ${HUGECTR_HOME}/lib/libgtest* ${HUGECTR_HOME}/lib/pkgconfig/gtest* ${HUGECTR_HOME}/include/gtest && \
git clone --branch ${HUGECTR_VER} --depth 1 --recurse-submodules --shallow-submodules https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
pushd /hugectr && \
rm -rf .git/modules && \
Expand Down

0 comments on commit 64966c5

Please sign in to comment.