-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
729 changed files
with
235,562 additions
and
1 deletion.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[workspace] | ||
members = [ | ||
"benchmark", | ||
"router", | ||
"router/client", | ||
"router/grpc-metadata", | ||
"launcher" | ||
] | ||
resolver = "2" | ||
|
||
[workspace.package] | ||
version = "2.0.2" | ||
edition = "2021" | ||
authors = ["Olivier Dehaene"] | ||
homepage = "https://github.com/huggingface/text-generation-inference" | ||
|
||
[workspace.dependencies] | ||
tokenizers = { version = "0.19.1", features = ["http"] } | ||
hf-hub = { version = "0.3.1", features = ["tokio"] } | ||
|
||
[profile.release] | ||
debug = 1 | ||
incremental = true | ||
lto = "fat" | ||
opt-level = 3 | ||
codegen-units = 1 | ||
panic = "abort" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Rust builder | ||
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef | ||
WORKDIR /usr/src | ||
|
||
FROM chef as planner | ||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM chef AS builder | ||
|
||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | ||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | ||
rm -f $PROTOC_ZIP | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
COPY Cargo.lock Cargo.lock | ||
RUN cargo chef cook --release --recipe-path recipe.json | ||
|
||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo build --release | ||
|
||
# Text Generation Inference base image | ||
FROM vault.habana.ai/gaudi-docker/1.16.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest as base | ||
|
||
# Text Generation Inference base env | ||
ENV HUGGINGFACE_HUB_CACHE=/data \ | ||
HF_HUB_ENABLE_HF_TRANSFER=1 \ | ||
PORT=80 | ||
|
||
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it | ||
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ | ||
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb | ||
|
||
WORKDIR /usr/src | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
libssl-dev \ | ||
ca-certificates \ | ||
make \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Install server | ||
COPY proto proto | ||
COPY server server | ||
COPY server/Makefile server/Makefile | ||
RUN cd server && \ | ||
make gen-server && \ | ||
pip install -r requirements.txt && \ | ||
bash ./dill-0.3.8-patch.sh && \ | ||
pip install git+https://github.com/HabanaAI/[email protected] && \ | ||
pip install . --no-cache-dir | ||
|
||
# Install benchmarker | ||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark | ||
# Install router | ||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router | ||
# Install launcher | ||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher | ||
|
||
RUN python3 -m pip install --upgrade transformers accelerate | ||
|
||
# Final image | ||
FROM base | ||
|
||
ENTRYPOINT ["text-generation-launcher"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
# Rust builder | ||
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef | ||
WORKDIR /usr/src | ||
|
||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse | ||
|
||
FROM chef as planner | ||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM chef AS builder | ||
|
||
ARG GIT_SHA | ||
ARG DOCKER_LABEL | ||
|
||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | ||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | ||
rm -f $PROTOC_ZIP | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
RUN cargo chef cook --release --recipe-path recipe.json | ||
|
||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo build --release | ||
|
||
# Text Generation Inference base image for RoCm | ||
FROM rocm/dev-ubuntu-22.04:5.7 as base | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
build-essential \ | ||
ca-certificates \ | ||
ccache \ | ||
curl \ | ||
git \ | ||
make \ | ||
libssl-dev \ | ||
g++ \ | ||
# Needed to build VLLM & flash. | ||
rocthrust-dev \ | ||
hipsparse-dev \ | ||
hipblas-dev && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Keep in sync with `server/pyproject.toml | ||
ARG MAMBA_VERSION=23.1.0-1 | ||
ARG PYTORCH_VERSION='2.2.0.dev0' | ||
ARG ROCM_VERSION='5.7' | ||
ARG PYTHON_VERSION='3.10.10' | ||
# Automatically set by buildx | ||
ARG TARGETPLATFORM | ||
ENV PATH /opt/conda/bin:$PATH | ||
|
||
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda. | ||
# Install mamba | ||
# translating Docker's TARGETPLATFORM into mamba arches | ||
RUN case ${TARGETPLATFORM} in \ | ||
"linux/arm64") MAMBA_ARCH=aarch64 ;; \ | ||
*) MAMBA_ARCH=x86_64 ;; \ | ||
esac && \ | ||
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" | ||
RUN chmod +x ~/mambaforge.sh && \ | ||
bash ~/mambaforge.sh -b -p /opt/conda && \ | ||
mamba init && \ | ||
rm ~/mambaforge.sh | ||
|
||
# Install PyTorch 2.2 RC compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6. | ||
RUN pip install torch --index-url https://download.pytorch.org/whl/test/rocm5.7/ | ||
|
||
FROM base AS kernel-builder | ||
|
||
# Build vllm kernels | ||
FROM kernel-builder AS vllm-builder | ||
WORKDIR /usr/src | ||
|
||
COPY server/Makefile-vllm Makefile | ||
|
||
# Build specific version of vllm | ||
RUN make build-vllm-rocm | ||
|
||
# Build Flash Attention v2 kernels | ||
FROM kernel-builder AS flash-att-v2-builder | ||
WORKDIR /usr/src | ||
|
||
COPY server/Makefile-flash-att-v2 Makefile | ||
|
||
# Build specific version of flash attention v2 | ||
RUN make build-flash-attention-v2-rocm | ||
|
||
# Build Transformers CUDA kernels (gpt-neox and bloom) | ||
FROM kernel-builder as custom-kernels-builder | ||
WORKDIR /usr/src | ||
COPY server/custom_kernels/ . | ||
RUN PYTORCH_ROCM_ARCH=gfx90a python setup.py build | ||
|
||
# Build exllama kernels | ||
FROM kernel-builder as exllama-kernels-builder | ||
WORKDIR /usr/src | ||
COPY server/exllama_kernels/ . | ||
|
||
RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build | ||
|
||
# Build exllama v2 kernels | ||
FROM kernel-builder as exllamav2-kernels-builder | ||
WORKDIR /usr/src | ||
COPY server/exllamav2_kernels/ . | ||
|
||
RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build | ||
|
||
FROM base as base-copy | ||
|
||
# Text Generation Inference base env | ||
ENV HUGGINGFACE_HUB_CACHE=/data \ | ||
HF_HUB_ENABLE_HF_TRANSFER=1 \ | ||
PORT=80 | ||
|
||
# Copy builds artifacts from vllm builder | ||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages | ||
|
||
# Copy build artifacts from flash attention v2 builder | ||
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages | ||
|
||
# Copy build artifacts from custom kernels builder | ||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages | ||
|
||
# Copy build artifacts from exllama kernels builder | ||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages | ||
|
||
# Copy build artifacts from exllamav2 kernels builder | ||
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages | ||
|
||
# Install flash-attention dependencies | ||
RUN pip install einops --no-cache-dir | ||
|
||
# Install server | ||
COPY proto proto | ||
COPY server server | ||
COPY server/Makefile server/Makefile | ||
RUN cd server && \ | ||
make gen-server && \ | ||
pip install -r requirements_rocm.txt && \ | ||
pip install ".[accelerate, peft, outlines]" --no-cache-dir | ||
|
||
# Install benchmarker | ||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark | ||
# Install router | ||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router | ||
# Install launcher | ||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher | ||
|
||
# AWS Sagemaker compatible image | ||
FROM base-copy as sagemaker | ||
COPY sagemaker-entrypoint.sh entrypoint.sh | ||
RUN chmod +x entrypoint.sh | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
|
||
# Final image | ||
FROM base-copy | ||
|
||
ENTRYPOINT ["text-generation-launcher"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef | ||
WORKDIR /usr/src | ||
|
||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse | ||
|
||
FROM chef as planner | ||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM chef AS builder | ||
|
||
ARG GIT_SHA | ||
ARG DOCKER_LABEL | ||
|
||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | ||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | ||
rm -f $PROTOC_ZIP | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
RUN cargo chef cook --release --recipe-path recipe.json | ||
|
||
COPY Cargo.toml Cargo.toml | ||
COPY rust-toolchain.toml rust-toolchain.toml | ||
COPY proto proto | ||
COPY benchmark benchmark | ||
COPY router router | ||
COPY launcher launcher | ||
RUN cargo build --release | ||
|
||
|
||
# Text Generation Inference base image for Intel | ||
FROM intel/intel-extension-for-pytorch:2.1.10-xpu as base | ||
|
||
USER root | ||
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it | ||
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ | ||
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb | ||
|
||
|
||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | ||
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list | ||
|
||
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev ninja-build | ||
|
||
# Text Generation Inference base env | ||
ENV HUGGINGFACE_HUB_CACHE=/data \ | ||
HF_HUB_ENABLE_HF_TRANSFER=1 \ | ||
PORT=80 | ||
|
||
|
||
WORKDIR /usr/src | ||
# Build pytorch and ipex | ||
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b xpu_main origin/xpu-main | ||
RUN git clone https://github.com/pytorch/pytorch.git && cd pytorch && git checkout 209f2fa8ff86652f67d75c2f19bf9cb9942fd018 && git apply /usr/src/intel-extension-for-pytorch/torch_patches/00*.patch | ||
|
||
# Install server | ||
COPY proto proto | ||
COPY server server | ||
COPY server/Makefile server/Makefile | ||
RUN cd server && \ | ||
make gen-server && \ | ||
pip install -r requirements_cuda.txt && \ | ||
pip install ".[accelerate, peft, outlines]" --no-cache-dir | ||
|
||
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest | ||
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest | ||
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric | ||
ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/latest/etc/compiler/sys_check/sys_check.sh | ||
ENV CCL_CONFIGURATION=cpu_gpu_dpcpp | ||
ENV MANPATH=/opt/intel/oneapi/mpi/latest/share/man:/opt/intel/oneapi/mpi/latest/share/man:/opt/intel/oneapi/compiler/latest/share/man | ||
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest | ||
ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/latest | ||
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib | ||
ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/latest/lib/libintelocl.so | ||
ENV CLASSPATH=/opt/intel/oneapi/mpi/latest/share/java/mpi.jar:/opt/intel/oneapi/mpi/latest/share/java/mpi.jar | ||
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64: | ||
ENV MKLROOT=/opt/intel/oneapi/mkl/latest | ||
ENV NLSPATH=/opt/intel/oneapi/mkl/latest/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/latest/lib/locale/%l_%t/%N | ||
ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin | ||
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include | ||
ENV CCL_ZE_IPC_EXCHANGE=sockets | ||
|
||
|
||
RUN pip uninstall -y torch && cd pytorch && git submodule update --init --recursive && python setup.py install | ||
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=ON BUILD_WITH_CPU=ON USE_XETLA=ON python setup.py install | ||
|
||
# Install benchmarker | ||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark | ||
# Install router | ||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router | ||
# Install launcher | ||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher | ||
|
||
# Final image | ||
FROM base | ||
|
||
ENTRYPOINT ["text-generation-launcher"] | ||
CMD ["--json-output"] |
Oops, something went wrong.