diff --git a/README.md b/README.md index 05288d8..b5e9a23 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,8 @@ and running the CLI from within the latest corresponding `tritonserver` container image, which should have all necessary system dependencies installed. For vLLM and TRT-LLM, you can use their respective images: -- `nvcr.io/nvidia/tritonserver:24.05-vllm-python-py3` -- `nvcr.io/nvidia/tritonserver:24.05-trtllm-python-py3` +- `nvcr.io/nvidia/tritonserver:24.06-vllm-python-py3` +- `nvcr.io/nvidia/tritonserver:24.06-trtllm-python-py3` If you decide to run the CLI on the host or in a custom image, please see this list of [additional dependencies](#additional-dependencies-for-custom-environments) @@ -38,6 +38,7 @@ matrix below: | Triton CLI Version | TRT-LLM Version | Triton Container Tag | |:------------------:|:---------------:|:--------------------:| +| 0.0.9 | v0.10.0 | 24.06 | | 0.0.8 | v0.9.0 | 24.05 | | 0.0.7 | v0.9.0 | 24.04 | | 0.0.6 | v0.8.0 | 24.02, 24.03 | @@ -55,7 +56,7 @@ It is also possible to install from a specific branch name, a commit hash or a tag name. For example to install `triton_cli` with a specific tag: ```bash -GIT_REF="0.0.8" +GIT_REF="0.0.9" pip install git+https://github.com/triton-inference-server/triton_cli.git@${GIT_REF} ``` @@ -90,7 +91,7 @@ triton -h triton import -m gpt2 # Start server pointing at the default model repository -triton start --image nvcr.io/nvidia/tritonserver:24.05-vllm-python-py3 +triton start --image nvcr.io/nvidia/tritonserver:24.06-vllm-python-py3 # Infer with CLI triton infer -m gpt2 --prompt "machine learning is" @@ -144,10 +145,10 @@ docker run -ti \ --shm-size=1g --ulimit memlock=-1 \ -v ${HOME}/models:/root/models \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - nvcr.io/nvidia/tritonserver:24.05-vllm-python-py3 + nvcr.io/nvidia/tritonserver:24.06-vllm-python-py3 # Install the Triton CLI -pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.8 +pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.9 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3 huggingface-cli login @@ -213,10 +214,10 @@ docker run -ti \ -v /tmp:/tmp \ -v ${HOME}/models:/root/models \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - nvcr.io/nvidia/tritonserver:24.05-trtllm-python-py3 + nvcr.io/nvidia/tritonserver:24.06-trtllm-python-py3 # Install the Triton CLI -pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.8 +pip install git+https://github.com/triton-inference-server/triton_cli.git@0.0.9 # Authenticate with huggingface for restricted models like Llama-2 and Llama-3 huggingface-cli login diff --git a/src/triton_cli/__init__.py b/src/triton_cli/__init__.py index c1ab939..ed8df84 100644 --- a/src/triton_cli/__init__.py +++ b/src/triton_cli/__init__.py @@ -24,4 +24,4 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "0.0.9dev" +__version__ = "0.0.9" diff --git a/src/triton_cli/docker/Dockerfile b/src/triton_cli/docker/Dockerfile index 2a91ad5..9de6c9a 100644 --- a/src/triton_cli/docker/Dockerfile +++ b/src/triton_cli/docker/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/tritonserver:24.06-trtllm-python-py3 # Setup vLLM Triton backend RUN mkdir -p /opt/tritonserver/backends/vllm && \ - wget -P /opt/tritonserver/backends/vllm https://raw.githubusercontent.com/triton-inference-server/vllm_backend/main/src/model.py + wget -P /opt/tritonserver/backends/vllm https://raw.githubusercontent.com/triton-inference-server/vllm_backend/r24.06/src/model.py # vLLM runtime dependencies RUN pip install "vllm==0.4.3"